summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/scripts/common/post-upload.sh15
-rw-r--r--.ci/scripts/common/pre-upload.sh6
-rw-r--r--.ci/scripts/format/docker.sh6
-rw-r--r--.ci/scripts/format/exec.sh4
-rw-r--r--.ci/scripts/format/script.sh37
-rw-r--r--.ci/scripts/linux/docker.sh14
-rw-r--r--.ci/scripts/linux/exec.sh5
-rw-r--r--.ci/scripts/linux/upload.sh14
-rw-r--r--.ci/scripts/merge/apply-patches-by-label.py28
-rw-r--r--.ci/scripts/merge/check-label-presence.py18
-rw-r--r--.ci/scripts/merge/yuzubot-git-config.sh2
-rw-r--r--.ci/scripts/windows/docker.sh50
-rw-r--r--.ci/scripts/windows/exec.sh5
-rw-r--r--.ci/scripts/windows/scan_dll.py106
-rw-r--r--.ci/scripts/windows/upload.sh13
-rw-r--r--.ci/templates/build-single.yml23
-rw-r--r--.ci/templates/build-standard.yml23
-rw-r--r--.ci/templates/build-testing.yml33
-rw-r--r--.ci/templates/format-check.yml14
-rw-r--r--.ci/templates/merge.yml46
-rw-r--r--.ci/templates/mergebot.yml15
-rw-r--r--.ci/templates/retrieve-artifact-source.yml16
-rw-r--r--.ci/templates/retrieve-master-source.yml11
-rw-r--r--.ci/templates/sync-source.yml7
-rw-r--r--.ci/yuzu-mainline.yml25
-rw-r--r--.ci/yuzu-patreon.yml19
-rw-r--r--.ci/yuzu-repo-sync.yml19
-rw-r--r--.ci/yuzu-verify.yml20
-rw-r--r--CMakeModules/GenerateSCMRev.cmake4
-rw-r--r--README.md1
-rw-r--r--dist/license.md31
-rw-r--r--dist/qt_themes/colorful/icons/16x16/lock.pngbin0 -> 330 bytes
-rw-r--r--dist/qt_themes/colorful/icons/256x256/plus_folder.pngbin0 -> 4643 bytes
-rw-r--r--dist/qt_themes/colorful/icons/48x48/bad_folder.pngbin0 -> 15494 bytes
-rw-r--r--dist/qt_themes/colorful/icons/48x48/chip.pngbin0 -> 582 bytes
-rw-r--r--dist/qt_themes/colorful/icons/48x48/folder.pngbin0 -> 460 bytes
-rw-r--r--dist/qt_themes/colorful/icons/48x48/plus.pngbin0 -> 496 bytes
-rw-r--r--dist/qt_themes/colorful/icons/48x48/sd_card.pngbin0 -> 680 bytes
-rw-r--r--dist/qt_themes/colorful/icons/index.theme14
-rw-r--r--dist/qt_themes/colorful/style.qrc15
-rw-r--r--dist/qt_themes/colorful/style.qss4
-rw-r--r--dist/qt_themes/colorful_dark/icons/16x16/lock.pngbin0 -> 401 bytes
-rw-r--r--dist/qt_themes/colorful_dark/icons/index.theme8
-rw-r--r--dist/qt_themes/colorful_dark/style.qrc57
-rw-r--r--dist/qt_themes/default/default.qrc14
-rw-r--r--dist/qt_themes/default/icons/16x16/lock.pngbin0 -> 279 bytes
-rw-r--r--dist/qt_themes/default/icons/256x256/plus_folder.pngbin0 -> 3135 bytes
-rw-r--r--dist/qt_themes/default/icons/48x48/bad_folder.pngbin0 -> 1088 bytes
-rw-r--r--dist/qt_themes/default/icons/48x48/chip.pngbin0 -> 15070 bytes
-rw-r--r--dist/qt_themes/default/icons/48x48/folder.pngbin0 -> 410 bytes
-rw-r--r--dist/qt_themes/default/icons/48x48/plus.pngbin0 -> 316 bytes
-rw-r--r--dist/qt_themes/default/icons/48x48/sd_card.pngbin0 -> 614 bytes
-rw-r--r--dist/qt_themes/default/icons/index.theme5
-rw-r--r--dist/qt_themes/qdarkstyle/icons/16x16/lock.pngbin0 -> 304 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/256x256/plus_folder.pngbin0 -> 3438 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/48x48/bad_folder.pngbin0 -> 1098 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/48x48/chip.pngbin0 -> 15120 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/48x48/folder.pngbin0 -> 542 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/48x48/plus.pngbin0 -> 339 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/48x48/sd_card.pngbin0 -> 676 bytes
-rw-r--r--dist/qt_themes/qdarkstyle/icons/index.theme7
-rw-r--r--dist/qt_themes/qdarkstyle/style.qrc7
m---------externals/fmt0
-rw-r--r--license.txt16
-rw-r--r--src/audio_core/audio_renderer.cpp14
-rw-r--r--src/audio_core/audio_renderer.h3
-rw-r--r--src/common/CMakeLists.txt5
-rw-r--r--src/common/alignment.h66
-rw-r--r--src/common/binary_find.h21
-rw-r--r--src/common/bit_util.h44
-rw-r--r--src/common/common_funcs.h1
-rw-r--r--src/core/CMakeLists.txt12
-rw-r--r--src/core/arm/arm_interface.h7
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp9
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h3
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp18
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h3
-rw-r--r--src/core/core.cpp29
-rw-r--r--src/core/core.h22
-rw-r--r--src/core/core_cpu.cpp19
-rw-r--r--src/core/crypto/key_manager.cpp241
-rw-r--r--src/core/crypto/key_manager.h116
-rw-r--r--src/core/file_sys/program_metadata.cpp4
-rw-r--r--src/core/file_sys/program_metadata.h4
-rw-r--r--src/core/file_sys/system_archive/mii_model.cpp46
-rw-r--r--src/core/file_sys/system_archive/mii_model.h13
-rw-r--r--src/core/file_sys/system_archive/system_archive.cpp3
-rw-r--r--src/core/hardware_interrupt_manager.cpp30
-rw-r--r--src/core/hardware_interrupt_manager.h31
-rw-r--r--src/core/hle/kernel/code_set.h3
-rw-r--r--src/core/hle/kernel/physical_memory.h19
-rw-r--r--src/core/hle/kernel/process.cpp182
-rw-r--r--src/core/hle/kernel/process.h57
-rw-r--r--src/core/hle/kernel/shared_memory.cpp6
-rw-r--r--src/core/hle/kernel/shared_memory.h13
-rw-r--r--src/core/hle/kernel/svc.cpp158
-rw-r--r--src/core/hle/kernel/svc_wrap.h5
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/core/hle/kernel/thread.h16
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp2
-rw-r--r--src/core/hle/kernel/transfer_memory.h3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp397
-rw-r--r--src/core/hle/kernel/vm_manager.h108
-rw-r--r--src/core/hle/service/acc/acc.cpp73
-rw-r--r--src/core/hle/service/acc/acc.h24
-rw-r--r--src/core/hle/service/acc/acc_u0.cpp4
-rw-r--r--src/core/hle/service/acc/errors.h14
-rw-r--r--src/core/hle/service/am/am.cpp147
-rw-r--r--src/core/hle/service/am/am.h40
-rw-r--r--src/core/hle/service/am/applet_ae.cpp18
-rw-r--r--src/core/hle/service/am/applet_oe.cpp11
-rw-r--r--src/core/hle/service/am/applets/applets.cpp25
-rw-r--r--src/core/hle/service/am/applets/applets.h17
-rw-r--r--src/core/hle/service/am/applets/error.cpp7
-rw-r--r--src/core/hle/service/am/applets/error.h7
-rw-r--r--src/core/hle/service/am/applets/general_backend.cpp13
-rw-r--r--src/core/hle/service/am/applets/general_backend.h12
-rw-r--r--src/core/hle/service/am/applets/profile_select.cpp5
-rw-r--r--src/core/hle/service/am/applets/profile_select.h7
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp5
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h7
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp19
-rw-r--r--src/core/hle/service/am/applets/web_browser.h12
-rw-r--r--src/core/hle/service/apm/apm.cpp13
-rw-r--r--src/core/hle/service/apm/apm.h7
-rw-r--r--src/core/hle/service/apm/controller.cpp68
-rw-r--r--src/core/hle/service/apm/controller.h70
-rw-r--r--src/core/hle/service/apm/interface.cpp82
-rw-r--r--src/core/hle/service/apm/interface.h14
-rw-r--r--src/core/hle/service/audio/audio.cpp6
-rw-r--r--src/core/hle/service/audio/audio.h6
-rw-r--r--src/core/hle/service/audio/audout_u.cpp36
-rw-r--r--src/core/hle/service/audio/audout_u.h12
-rw-r--r--src/core/hle/service/audio/audren_u.cpp257
-rw-r--r--src/core/hle/service/audio/audren_u.h24
-rw-r--r--src/core/hle/service/es/es.cpp230
-rw-r--r--src/core/hle/service/fatal/fatal.cpp2
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp10
-rw-r--r--src/core/hle/service/filesystem/filesystem.h2
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp54
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h26
-rw-r--r--src/core/hle/service/friend/errors.h12
-rw-r--r--src/core/hle/service/friend/friend.cpp150
-rw-r--r--src/core/hle/service/friend/friend.h1
-rw-r--r--src/core/hle/service/friend/interface.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp37
-rw-r--r--src/core/hle/service/hid/controllers/npad.h6
-rw-r--r--src/core/hle/service/hid/errors.h13
-rw-r--r--src/core/hle/service/hid/hid.cpp75
-rw-r--r--src/core/hle/service/hid/hid.h5
-rw-r--r--src/core/hle/service/ldr/ldr.cpp32
-rw-r--r--src/core/hle/service/mii/mii.cpp16
-rw-r--r--src/core/hle/service/mii/mii_manager.cpp4
-rw-r--r--src/core/hle/service/ns/pl_u.cpp12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp152
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp44
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h41
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h5
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp48
-rw-r--r--src/core/hle/service/nvdrv/interface.h4
-rw-r--r--src/core/hle/service/nvdrv/nvdata.h48
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp59
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h88
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp23
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h11
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp23
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h4
-rw-r--r--src/core/hle/service/pm/pm.cpp124
-rw-r--r--src/core/hle/service/pm/pm.h6
-rw-r--r--src/core/hle/service/service.cpp15
-rw-r--r--src/core/hle/service/service.h3
-rw-r--r--src/core/hle/service/set/set.cpp10
-rw-r--r--src/core/hle/service/set/set.h1
-rw-r--r--src/core/hle/service/time/interface.cpp11
-rw-r--r--src/core/hle/service/time/interface.h5
-rw-r--r--src/core/hle/service/time/time.cpp115
-rw-r--r--src/core/hle/service/time/time.h11
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp68
-rw-r--r--src/core/hle/service/time/time_sharedmemory.h74
-rw-r--r--src/core/hle/service/vi/vi.cpp48
-rw-r--r--src/core/loader/elf.cpp2
-rw-r--r--src/core/loader/kip.cpp2
-rw-r--r--src/core/loader/nro.cpp11
-rw-r--r--src/core/loader/nro.h1
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/core/memory.h4
-rw-r--r--src/core/reporter.cpp60
-rw-r--r--src/core/reporter.h15
-rw-r--r--src/core/settings.cpp1
-rw-r--r--src/core/settings.h2
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/video_core/CMakeLists.txt24
-rw-r--r--src/video_core/buffer_cache/buffer_block.h76
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h447
-rw-r--r--src/video_core/buffer_cache/map_interval.h89
-rw-r--r--src/video_core/dma_pusher.cpp3
-rw-r--r--src/video_core/engines/engine_upload.cpp6
-rw-r--r--src/video_core/engines/engine_upload.h6
-rw-r--r--src/video_core/engines/fermi_2d.cpp28
-rw-r--r--src/video_core/engines/fermi_2d.h56
-rw-r--r--src/video_core/engines/kepler_compute.cpp62
-rw-r--r--src/video_core/engines/kepler_compute.h23
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp341
-rw-r--r--src/video_core/engines/maxwell_3d.h141
-rw-r--r--src/video_core/engines/maxwell_dma.cpp54
-rw-r--r--src/video_core/engines/maxwell_dma.h13
-rw-r--r--src/video_core/engines/shader_bytecode.h200
-rw-r--r--src/video_core/gpu.cpp98
-rw-r--r--src/video_core/gpu.h64
-rw-r--r--src/video_core/gpu_asynch.cpp14
-rw-r--r--src/video_core/gpu_asynch.h8
-rw-r--r--src/video_core/gpu_synch.cpp7
-rw-r--r--src/video_core/gpu_synch.h9
-rw-r--r--src/video_core/gpu_thread.cpp35
-rw-r--r--src/video_core/gpu_thread.h35
-rw-r--r--src/video_core/macro_interpreter.cpp22
-rw-r--r--src/video_core/macro_interpreter.h8
-rw-r--r--src/video_core/memory_manager.cpp29
-rw-r--r--src/video_core/memory_manager.h8
-rw-r--r--src/video_core/morton.cpp116
-rw-r--r--src/video_core/morton.h3
-rw-r--r--src/video_core/rasterizer_interface.h16
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp124
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h77
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp49
-rw-r--r--src/video_core/renderer_opengl/gl_device.h21
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp694
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h88
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1362
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h572
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h28
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp356
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h70
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp1296
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h97
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp131
-rw-r--r--src/video_core/renderer_opengl/gl_state.h52
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp624
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h147
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp105
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_opengl/utils.cpp52
-rw-r--r--src/video_core/renderer_opengl/utils.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h78
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp132
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h2
-rw-r--r--src/video_core/shader/control_flow.cpp481
-rw-r--r--src/video_core/shader/control_flow.h79
-rw-r--r--src/video_core/shader/decode.cpp179
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp13
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp44
-rw-r--r--src/video_core/shader/decode/decode_integer_set.cpp0
-rw-r--r--src/video_core/shader/decode/ffma.cpp10
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp10
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp70
-rw-r--r--src/video_core/shader/decode/hfma2.cpp4
-rw-r--r--src/video_core/shader/decode/image.cpp164
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp37
-rw-r--r--src/video_core/shader/decode/other.cpp71
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/shader/decode/shift.cpp19
-rw-r--r--src/video_core/shader/decode/texture.cpp87
-rw-r--r--src/video_core/shader/decode/warp.cpp55
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h143
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp141
-rw-r--r--src/video_core/shader/shader_ir.h79
-rw-r--r--src/video_core/shader/track.cpp37
-rw-r--r--src/video_core/surface.cpp13
-rw-r--r--src/video_core/surface.h225
-rw-r--r--src/video_core/texture_cache.cpp386
-rw-r--r--src/video_core/texture_cache.h586
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/surface_base.cpp302
-rw-r--r--src/video_core/texture_cache/surface_base.h325
-rw-r--r--src/video_core/texture_cache/surface_params.cpp389
-rw-r--r--src/video_core/texture_cache/surface_params.h286
-rw-r--r--src/video_core/texture_cache/surface_view.cpp23
-rw-r--r--src/video_core/texture_cache/surface_view.h67
-rw-r--r--src/video_core/texture_cache/texture_cache.h835
-rw-r--r--src/video_core/textures/convert.cpp14
-rw-r--r--src/video_core/textures/convert.h7
-rw-r--r--src/video_core/textures/decoders.cpp54
-rw-r--r--src/video_core/textures/decoders.h7
-rw-r--r--src/video_core/textures/texture.h31
-rw-r--r--src/yuzu/CMakeLists.txt55
-rw-r--r--src/yuzu/configuration/config.cpp51
-rw-r--r--src/yuzu/configuration/configure_debug.cpp6
-rw-r--r--src/yuzu/configuration/configure_debug.ui18
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp44
-rw-r--r--src/yuzu/configuration/configure_gamelist.cpp2
-rw-r--r--src/yuzu/configuration/configure_general.cpp17
-rw-r--r--src/yuzu/configuration/configure_general.ui50
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics.ui27
-rw-r--r--src/yuzu/configuration/configure_input.cpp12
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp27
-rw-r--r--src/yuzu/configuration/configure_input_simple.cpp4
-rw-r--r--src/yuzu/configuration/configure_mouse_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_per_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_profile_manager.cpp8
-rw-r--r--src/yuzu/configuration/configure_touchscreen_advanced.cpp2
-rw-r--r--src/yuzu/configuration/configure_web.cpp2
-rw-r--r--src/yuzu/debugger/console.cpp2
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp438
-rw-r--r--src/yuzu/game_list.h43
-rw-r--r--src/yuzu/game_list_p.h129
-rw-r--r--src/yuzu/game_list_worker.cpp88
-rw-r--r--src/yuzu/game_list_worker.h26
-rw-r--r--src/yuzu/hotkeys.cpp2
-rw-r--r--src/yuzu/main.cpp137
-rw-r--r--src/yuzu/main.h11
-rw-r--r--src/yuzu/main.ui1
-rw-r--r--src/yuzu/uisettings.cpp (renamed from src/yuzu/ui_settings.cpp)2
-rw-r--r--src/yuzu/uisettings.h (renamed from src/yuzu/ui_settings.h)21
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h7
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp4
-rw-r--r--src/yuzu_tester/config.cpp1
-rw-r--r--src/yuzu_tester/default_ini.h4
-rw-r--r--src/yuzu_tester/yuzu.cpp3
364 files changed, 14413 insertions, 6516 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh
new file mode 100644
index 000000000..bb4e9d328
--- /dev/null
+++ b/.ci/scripts/common/post-upload.sh
@@ -0,0 +1,15 @@
1#!/bin/bash -ex
2
3# Copy documentation
4cp license.txt "$REV_NAME"
5cp README.md "$REV_NAME"
6
7tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME"
8
9mv "$REV_NAME" $RELEASE_NAME
10
117z a "$REV_NAME.7z" $RELEASE_NAME
12
13# move the compiled archive into the artifacts directory to be uploaded by travis releases
14mv "$ARCHIVE_NAME" artifacts/
15mv "$REV_NAME.7z" artifacts/
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh
new file mode 100644
index 000000000..3c2fc79a2
--- /dev/null
+++ b/.ci/scripts/common/pre-upload.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`"
4GITREV="`git show -s --format='%h'`"
5
6mkdir -p artifacts
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh
new file mode 100644
index 000000000..778411e4a
--- /dev/null
+++ b/.ci/scripts/format/docker.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3# Run clang-format
4cd /yuzu
5chmod a+x ./.ci/scripts/format/script.sh
6./.ci/scripts/format/script.sh
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh
new file mode 100644
index 000000000..5d6393b38
--- /dev/null
+++ b/.ci/scripts/format/exec.sh
@@ -0,0 +1,4 @@
1#!/bin/bash -ex
2
3chmod a+x ./.ci/scripts/format/docker.sh
4docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh
new file mode 100644
index 000000000..5ab828d5e
--- /dev/null
+++ b/.ci/scripts/format/script.sh
@@ -0,0 +1,37 @@
1#!/bin/bash -ex
2
3if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \
4 dist/*.svg dist/*.xml; then
5 echo Trailing whitespace found, aborting
6 exit 1
7fi
8
9# Default clang-format points to default 3.5 version one
10CLANG_FORMAT=clang-format-6.0
11$CLANG_FORMAT --version
12
13if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
14 # Get list of every file modified in this pull request
15 files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)"
16else
17 # Check everything for branch pushes
18 files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')"
19fi
20
21# Turn off tracing for this because it's too verbose
22set +x
23
24for f in $files_to_lint; do
25 d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true)
26 if ! [ -z "$d" ]; then
27 echo "!!! $f not compliant to coding style, here is the fix:"
28 echo "$d"
29 fail=1
30 fi
31done
32
33set -x
34
35if [ "$fail" = 1 ]; then
36 exit 1
37fi
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh
new file mode 100644
index 000000000..f538a4081
--- /dev/null
+++ b/.ci/scripts/linux/docker.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7mkdir build || true && cd build
8cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
9
10ninja
11
12ccache -s
13
14ctest -VV -C Release
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh
new file mode 100644
index 000000000..a5a6c34b9
--- /dev/null
+++ b/.ci/scripts/linux/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/linux/docker.sh
5docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh
new file mode 100644
index 000000000..0d131d1dd
--- /dev/null
+++ b/.ci/scripts/linux/upload.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.xz"
7COMPRESSION_FLAGS="-cJvf"
8
9mkdir "$REV_NAME"
10
11cp build/bin/yuzu-cmd "$REV_NAME"
12cp build/bin/yuzu "$REV_NAME"
13
14. .ci/scripts/common/post-upload.sh
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py
new file mode 100644
index 000000000..b346001a5
--- /dev/null
+++ b/.ci/scripts/merge/apply-patches-by-label.py
@@ -0,0 +1,28 @@
1# Download all pull requests as patches that match a specific label
2# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to>
3
4import requests, sys, json, urllib3.request, shutil, subprocess
5
6http = urllib3.PoolManager()
7dl_list = {}
8
9def check_individual(labels):
10 for label in labels:
11 if (label["name"] == sys.argv[1]):
12 return True
13 return False
14
15try:
16 url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls'
17 response = requests.get(url)
18 if (response.ok):
19 j = json.loads(response.content)
20 for pr in j:
21 if (check_individual(pr["labels"])):
22 pn = pr["number"]
23 print("Matched PR# %s" % pn)
24 print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"]))
25 print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn]))
26 print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn]))
27except:
28 sys.exit(-1)
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py
new file mode 100644
index 000000000..048466d7e
--- /dev/null
+++ b/.ci/scripts/merge/check-label-presence.py
@@ -0,0 +1,18 @@
1# Checks to see if the specified pull request # has the specified tag
2# Usage: python check-label-presence.py <Pull Request ID> <Name of Label>
3
4import requests, json, sys
5
6try:
7 url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1]
8 response = requests.get(url)
9 if (response.ok):
10 j = json.loads(response.content)
11 for label in j["labels"]:
12 if label["name"] == sys.argv[2]:
13 print('##vso[task.setvariable variable=enabletesting;]true')
14 sys.exit()
15except:
16 sys.exit(-1)
17
18print('##vso[task.setvariable variable=enabletesting;]false')
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh
new file mode 100644
index 000000000..d9d595bbc
--- /dev/null
+++ b/.ci/scripts/merge/yuzubot-git-config.sh
@@ -0,0 +1,2 @@
1git config --global user.email "yuzu@yuzu-emu.org"
2git config --global user.name "yuzubot" \ No newline at end of file
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh
new file mode 100644
index 000000000..f7093363b
--- /dev/null
+++ b/.ci/scripts/windows/docker.sh
@@ -0,0 +1,50 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
8mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
9chmod +x /bin/uname
10
11# Dirty hack to trick unicorn makefile into believing we have cmd
12echo '' >> /bin/cmd
13chmod +x /bin/cmd
14
15mkdir build || true && cd build
16cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
17ninja
18
19# Clean up the dirty hacks
20rm /bin/uname && mv /bin/uname1 /bin/uname
21rm /bin/cmd
22
23ccache -s
24
25echo "Tests skipped"
26#ctest -VV -C Release
27
28echo 'Prepare binaries...'
29cd ..
30mkdir package
31
32QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
33find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
34
35# copy Qt plugins
36mkdir package/platforms
37cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
38cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
39cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
40rm -f package/mediaservice/*d.dll
41
42for i in package/*.exe; do
43 # we need to process pdb here, however, cv2pdb
44 # does not work here, so we just simply strip all the debug symbols
45 x86_64-w64-mingw32-strip "${i}"
46done
47
48pip3 install pefile
49python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/"
50python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/"
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh
new file mode 100644
index 000000000..d6a994856
--- /dev/null
+++ b/.ci/scripts/windows/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/windows/docker.sh
5docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py
new file mode 100644
index 000000000..163183f2e
--- /dev/null
+++ b/.ci/scripts/windows/scan_dll.py
@@ -0,0 +1,106 @@
1import pefile
2import sys
3import re
4import os
5import queue
6import shutil
7
8# constant definitions
9KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL',
10 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL']
11# below is for Ubuntu 18.04 with specified PPA enabled, if you are using
12# other distro or different repositories, change the following accordingly
13DLL_PATH = [
14 '/usr/x86_64-w64-mingw32/bin/',
15 '/usr/x86_64-w64-mingw32/lib/',
16 '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/'
17]
18
19missing = []
20
21
22def parse_imports(file_name):
23 results = []
24 pe = pefile.PE(file_name, fast_load=True)
25 pe.parse_data_directories()
26
27 for entry in pe.DIRECTORY_ENTRY_IMPORT:
28 current = entry.dll.decode()
29 current_u = current.upper() # b/c Windows is often case insensitive
30 # here we filter out system dlls
31 # dll w/ names like *32.dll are likely to be system dlls
32 if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'):
33 results.append(current)
34
35 return results
36
37
38def parse_imports_recursive(file_name, path_list=[]):
39 q = queue.Queue() # create a FIFO queue
40 # file_name can be a string or a list for the convience
41 if isinstance(file_name, str):
42 q.put(file_name)
43 elif isinstance(file_name, list):
44 for i in file_name:
45 q.put(i)
46 full_list = []
47 while q.qsize():
48 current = q.get_nowait()
49 print('> %s' % current)
50 deps = parse_imports(current)
51 # if this dll does not have any import, ignore it
52 if not deps:
53 continue
54 for dep in deps:
55 # the dependency already included in the list, skip
56 if dep in full_list:
57 continue
58 # find the requested dll in the provided paths
59 full_path = find_dll(dep)
60 if not full_path:
61 missing.append(dep)
62 continue
63 full_list.append(dep)
64 q.put(full_path)
65 path_list.append(full_path)
66 return full_list
67
68
69def find_dll(name):
70 for path in DLL_PATH:
71 for root, _, files in os.walk(path):
72 for f in files:
73 if name.lower() == f.lower():
74 return os.path.join(root, f)
75
76
77def deploy(name, dst, dry_run=False):
78 dlls_path = []
79 parse_imports_recursive(name, dlls_path)
80 for dll_entry in dlls_path:
81 if not dry_run:
82 shutil.copy(dll_entry, dst)
83 else:
84 print('[Dry-Run] Copy %s to %s' % (dll_entry, dst))
85 print('Deploy completed.')
86 return dlls_path
87
88
89def main():
90 if len(sys.argv) < 3:
91 print('Usage: %s [files to examine ...] [target deploy directory]')
92 return 1
93 to_deploy = sys.argv[1:-1]
94 tgt_dir = sys.argv[-1]
95 if not os.path.isdir(tgt_dir):
96 print('%s is not a directory.' % tgt_dir)
97 return 1
98 print('Scanning dependencies...')
99 deploy(to_deploy, tgt_dir)
100 if missing:
101 print('Following DLLs are not found: %s' % ('\n'.join(missing)))
102 return 0
103
104
105if __name__ == '__main__':
106 main()
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh
new file mode 100644
index 000000000..de73d3541
--- /dev/null
+++ b/.ci/scripts/windows/upload.sh
@@ -0,0 +1,13 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.gz"
7COMPRESSION_FLAGS="-czvf"
8
9mkdir "$REV_NAME"
10# get around the permission issues
11cp -r package/* "$REV_NAME"
12
13. .ci/scripts/common/post-upload.sh
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml
new file mode 100644
index 000000000..357731eb9
--- /dev/null
+++ b/.ci/templates/build-single.yml
@@ -0,0 +1,23 @@
1parameters:
2 artifactSource: 'true'
3 cache: 'false'
4
5steps:
6- task: DockerInstaller@0
7 displayName: 'Prepare Environment'
8 inputs:
9 dockerVersion: '17.09.0-ce'
10- ${{ if eq(parameters.cache, 'true') }}:
11 - task: CacheBeta@0
12 displayName: 'Cache Build System'
13 inputs:
14 key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix)
15 path: $(System.DefaultWorkingDirectory)/ccache
16 cacheHitVar: CACHE_RESTORED
17- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh
18 displayName: 'Build'
19- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && RELEASE_NAME=$(BuildName) ./.ci/scripts/$(ScriptFolder)/upload.sh
20 displayName: 'Package Artifacts'
21- publish: artifacts
22 artifact: 'yuzu-$(BuildName)-$(BuildSuffix)'
23 displayName: 'Upload Artifacts'
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
new file mode 100644
index 000000000..aa180894e
--- /dev/null
+++ b/.ci/templates/build-standard.yml
@@ -0,0 +1,23 @@
1jobs:
2- job: build
3 displayName: 'standard'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 10
8 matrix:
9 windows:
10 BuildSuffix: 'windows-mingw'
11 ScriptFolder: 'windows'
12 linux:
13 BuildSuffix: 'linux'
14 ScriptFolder: 'linux'
15 steps:
16 - template: ./sync-source.yml
17 parameters:
18 artifactSource: $(parameters.artifactSource)
19 needSubmodules: 'true'
20 - template: ./build-single.yml
21 parameters:
22 artifactSource: 'false'
23 cache: $(parameters.cache) \ No newline at end of file
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml
new file mode 100644
index 000000000..4c9625944
--- /dev/null
+++ b/.ci/templates/build-testing.yml
@@ -0,0 +1,33 @@
1jobs:
2- job: build_test
3 displayName: 'testing'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 5
8 matrix:
9 windows:
10 BuildSuffix: 'windows-testing'
11 ScriptFolder: 'windows'
12 steps:
13 - script: sudo apt-get update && sudo apt-get --only-upgrade -y install python3-pip && pip install requests urllib3
14 displayName: 'Prepare Environment'
15 - task: PythonScript@0
16 condition: eq(variables['Build.Reason'], 'PullRequest')
17 displayName: 'Determine Testing Status'
18 inputs:
19 scriptSource: 'filePath'
20 scriptPath: '.ci/scripts/merge/check-label-presence.py'
21 arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build'
22 - ${{ if eq(variables.enabletesting, 'true') }}:
23 - template: ./sync-source.yml
24 parameters:
25 artifactSource: $(parameters.artifactSource)
26 needSubmodules: 'true'
27 - template: ./mergebot.yml
28 parameters:
29 matchLabel: 'testing-merge'
30 - template: ./build-single.yml
31 parameters:
32 artifactSource: 'false'
33 cache: 'false'
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml
new file mode 100644
index 000000000..5061f1cb8
--- /dev/null
+++ b/.ci/templates/format-check.yml
@@ -0,0 +1,14 @@
1parameters:
2 artifactSource: 'true'
3
4steps:
5- template: ./sync-source.yml
6 parameters:
7 artifactSource: $(parameters.artifactSource)
8 needSubmodules: 'false'
9- task: DockerInstaller@0
10 displayName: 'Prepare Environment'
11 inputs:
12 dockerVersion: '17.09.0-ce'
13- script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh
14 displayName: 'Verify Formatting'
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml
new file mode 100644
index 000000000..efc82778a
--- /dev/null
+++ b/.ci/templates/merge.yml
@@ -0,0 +1,46 @@
1jobs:
2- job: merge
3 displayName: 'pull requests'
4 steps:
5 - checkout: self
6 submodules: recursive
7 - template: ./mergebot.yml
8 parameters:
9 matchLabel: '$(BuildName)-merge'
10 - task: ArchiveFiles@2
11 displayName: 'Package Source'
12 inputs:
13 rootFolderOrFile: '$(System.DefaultWorkingDirectory)'
14 includeRootFolder: false
15 archiveType: '7z'
16 archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
17 - task: PublishPipelineArtifact@1
18 displayName: 'Upload Artifacts'
19 inputs:
20 targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
21 artifact: 'yuzu-$(BuildName)-source'
22 replaceExistingArchive: true
23- job: upload_source
24 displayName: 'upload'
25 dependsOn: merge
26 steps:
27 - template: ./sync-source.yml
28 parameters:
29 artifactSource: 'true'
30 needSubmodules: 'true'
31 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
32 displayName: 'Apply Git Configuration'
33 - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)"
34 displayName: 'Tag Source'
35 - script: git remote add other $(GitRepoPushChangesURL)
36 displayName: 'Register Repository'
37 - script: git push --follow-tags --force other HEAD:$(GitPushBranch)
38 displayName: 'Update Code'
39 - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha
40 displayName: 'Calculate Release Point'
41 - task: PublishPipelineArtifact@1
42 displayName: 'Upload Release Point'
43 inputs:
44 targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha'
45 artifact: 'yuzu-$(BuildName)-release-point'
46 replaceExistingArchive: true \ No newline at end of file
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml
new file mode 100644
index 000000000..5211efcc6
--- /dev/null
+++ b/.ci/templates/mergebot.yml
@@ -0,0 +1,15 @@
1parameters:
2 matchLabel: 'dummy-merge'
3
4steps:
5 - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3
6 displayName: 'Prepare Environment'
7 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
8 displayName: 'Apply Git Configuration'
9 - task: PythonScript@0
10 displayName: 'Discover, Download, and Apply Patches'
11 inputs:
12 scriptSource: 'filePath'
13 scriptPath: '.ci/scripts/merge/apply-patches-by-label.py'
14 arguments: '${{ parameters.matchLabel }} patches'
15 workingDirectory: '$(System.DefaultWorkingDirectory)'
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml
new file mode 100644
index 000000000..47d217e7b
--- /dev/null
+++ b/.ci/templates/retrieve-artifact-source.yml
@@ -0,0 +1,16 @@
1steps:
2- checkout: none
3- task: DownloadPipelineArtifact@2
4 displayName: 'Download Source'
5 inputs:
6 artifactName: 'yuzu-$(BuildName)-source'
7 buildType: 'current'
8 targetPath: '$(Build.ArtifactStagingDirectory)'
9- script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory)
10 displayName: 'Clean Working Directory'
11- task: ExtractFiles@1
12 displayName: 'Prepare Source'
13 inputs:
14 archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z'
15 destinationFolder: '$(System.DefaultWorkingDirectory)'
16 cleanDestinationFolder: false \ No newline at end of file
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml
new file mode 100644
index 000000000..a08a3f926
--- /dev/null
+++ b/.ci/templates/retrieve-master-source.yml
@@ -0,0 +1,11 @@
1parameters:
2 needSubmodules: 'true'
3
4steps:
5- checkout: self
6 displayName: 'Checkout Recursive'
7 submodules: recursive
8# condition: eq(parameters.needSubmodules, 'true')
9#- checkout: self
10# displayName: 'Checkout Fast'
11# condition: ne(parameters.needSubmodules, 'true')
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml
new file mode 100644
index 000000000..409e1cd83
--- /dev/null
+++ b/.ci/templates/sync-source.yml
@@ -0,0 +1,7 @@
1steps:
2- ${{ if eq(parameters.artifactSource, 'true') }}:
3 - template: ./retrieve-artifact-source.yml
4- ${{ if ne(parameters.artifactSource, 'true') }}:
5 - template: ./retrieve-master-source.yml
6 parameters:
7 needSubmodules: $(parameters.needSubmodules) \ No newline at end of file
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml
new file mode 100644
index 000000000..2930a8564
--- /dev/null
+++ b/.ci/yuzu-mainline.yml
@@ -0,0 +1,25 @@
1trigger:
2- master
3
4stages:
5- stage: merge
6 displayName: 'merge'
7 jobs:
8 - template: ./templates/merge.yml
9- stage: format
10 dependsOn: merge
11 displayName: 'format'
12 jobs:
13 - job: format
14 displayName: 'clang'
15 pool:
16 vmImage: ubuntu-latest
17 steps:
18 - template: ./templates/format-check.yml
19- stage: build
20 displayName: 'build'
21 dependsOn: format
22 jobs:
23 - template: ./templates/build-standard.yml
24 parameters:
25 cache: 'true'
diff --git a/.ci/yuzu-patreon.yml b/.ci/yuzu-patreon.yml
new file mode 100644
index 000000000..aa912913d
--- /dev/null
+++ b/.ci/yuzu-patreon.yml
@@ -0,0 +1,19 @@
1# Starter pipeline
2# Start with a minimal pipeline that you can customize to build and deploy your code.
3# Add steps that build, run tests, deploy, and more:
4# https://aka.ms/yaml
5
6trigger:
7- master
8
9pool:
10 vmImage: 'ubuntu-latest'
11
12steps:
13- script: echo Hello, world!
14 displayName: 'Run a one-line script'
15
16- script: |
17 echo Add other tasks to build, test, and deploy your project.
18 echo See https://aka.ms/yaml
19 displayName: 'Run a multi-line script'
diff --git a/.ci/yuzu-repo-sync.yml b/.ci/yuzu-repo-sync.yml
new file mode 100644
index 000000000..602e298a6
--- /dev/null
+++ b/.ci/yuzu-repo-sync.yml
@@ -0,0 +1,19 @@
1trigger:
2- master
3
4jobs:
5- job: copy
6 displayName: 'Sync Repository'
7 pool:
8 vmImage: 'ubuntu-latest'
9 steps:
10 - script: echo 'https://$(GitUsername):$(GitAccessToken)@dev.azure.com' > $HOME/.git-credentials
11 displayName: 'Load Credentials'
12 - script: git config --global credential.helper store
13 displayName: 'Register Credential Helper'
14 - script: git remote add other $(GitRepoPushChangesURL)
15 displayName: 'Register Repository'
16 - script: git push --force other HEAD:$(GitPushBranch)
17 displayName: 'Update Code'
18 - script: rm -rf $HOME/.git-credentials
19 displayName: 'Clear Cached Credentials'
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml
new file mode 100644
index 000000000..5492e696a
--- /dev/null
+++ b/.ci/yuzu-verify.yml
@@ -0,0 +1,20 @@
1stages:
2- stage: format
3 displayName: 'format'
4 jobs:
5 - job: format
6 displayName: 'clang'
7 pool:
8 vmImage: ubuntu-latest
9 steps:
10 - template: ./templates/format-check.yml
11 parameters:
12 artifactSource: 'false'
13- stage: build
14 displayName: 'build'
15 dependsOn: format
16 jobs:
17 - template: ./templates/build-standard.yml
18 parameters:
19 cache: 'false'
20 - template: ./templates/build-testing.yml
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 31edeb63d..a1ace89cb 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -70,6 +70,7 @@ set(HASH_FILES
70 "${VIDEO_CORE}/shader/decode/half_set.cpp" 70 "${VIDEO_CORE}/shader/decode/half_set.cpp"
71 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" 71 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
72 "${VIDEO_CORE}/shader/decode/hfma2.cpp" 72 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
73 "${VIDEO_CORE}/shader/decode/image.cpp"
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 74 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 75 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp" 76 "${VIDEO_CORE}/shader/decode/memory.cpp"
@@ -80,7 +81,10 @@ set(HASH_FILES
80 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 81 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
81 "${VIDEO_CORE}/shader/decode/shift.cpp" 82 "${VIDEO_CORE}/shader/decode/shift.cpp"
82 "${VIDEO_CORE}/shader/decode/video.cpp" 83 "${VIDEO_CORE}/shader/decode/video.cpp"
84 "${VIDEO_CORE}/shader/decode/warp.cpp"
83 "${VIDEO_CORE}/shader/decode/xmad.cpp" 85 "${VIDEO_CORE}/shader/decode/xmad.cpp"
86 "${VIDEO_CORE}/shader/control_flow.cpp"
87 "${VIDEO_CORE}/shader/control_flow.h"
84 "${VIDEO_CORE}/shader/decode.cpp" 88 "${VIDEO_CORE}/shader/decode.cpp"
85 "${VIDEO_CORE}/shader/node.h" 89 "${VIDEO_CORE}/shader/node.h"
86 "${VIDEO_CORE}/shader/node_helper.cpp" 90 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/README.md b/README.md
index 4b1ea7d7c..430c6dd65 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@ yuzu emulator
2============= 2=============
3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu) 3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu) 4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
5[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
5 6
6yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). 7yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
7 8
diff --git a/dist/license.md b/dist/license.md
new file mode 100644
index 000000000..b777ebb20
--- /dev/null
+++ b/dist/license.md
@@ -0,0 +1,31 @@
1The icons in this folder and its subfolders have the following licenses:
2
3Icon Name | License | Origin/Author
4--- | --- | ---
5qt_themes/default/icons/16x16/checked.png | Free for non-commercial use
6qt_themes/default/icons/16x16/failed.png | Free for non-commercial use
7qt_themes/default/icons/16x16/lock.png | CC BY-ND 3.0 | https://icons8.com
8qt_themes/default/icons/256x256/plus_folder.png | CC BY-ND 3.0 | https://icons8.com
9qt_themes/default/icons/48x48/bad_folder.png | CC BY-ND 3.0 | https://icons8.com
10qt_themes/default/icons/48x48/chip.png | CC BY-ND 3.0 | https://icons8.com
11qt_themes/default/icons/48x48/folder.png | CC BY-ND 3.0 | https://icons8.com
12qt_themes/default/icons/48x48/plus.png | CC0 1.0 | Designed by BreadFish64 from the Citra team
13qt_themes/default/icons/48x48/sd_card.png | CC BY-ND 3.0 | https://icons8.com
14qt_themes/qdarkstyle/icons/16x16/checked.png | Free for non-commercial use
15qt_themes/qdarkstyle/icons/16x16/failed.png | Free for non-commercial use
16qt_themes/qdarkstyle/icons/16x16/lock.png | CC BY-ND 3.0 | https://icons8.com
17qt_themes/qdarkstyle/icons/256x256/plus_folder.png | CC BY-ND 3.0 | https://icons8.com
18qt_themes/qdarkstyle/icons/48x48/bad_folder.png | CC BY-ND 3.0 | https://icons8.com
19qt_themes/qdarkstyle/icons/48x48/chip.png | CC BY-ND 3.0 | https://icons8.com
20qt_themes/qdarkstyle/icons/48x48/folder.png | CC BY-ND 3.0 | https://icons8.com
21qt_themes/qdarkstyle/icons/48x48/plus.png | CC0 1.0 | Designed by BreadFish64 from the Citra team
22qt_themes/qdarkstyle/icons/48x48/sd_card.png | CC BY-ND 3.0 | https://icons8.com
23qt_themes/colorful/icons/16x16/lock.png | CC BY-ND 3.0 | https://icons8.com
24qt_themes/colorful/icons/256x256/plus_folder.png | CC BY-ND 3.0 | https://icons8.com
25qt_themes/colorful/icons/48x48/bad_folder.png | CC BY-ND 3.0 | https://icons8.com
26qt_themes/colorful/icons/48x48/chip.png | CC BY-ND 3.0 | https://icons8.com
27qt_themes/colorful/icons/48x48/folder.png | CC BY-ND 3.0 | https://icons8.com
28qt_themes/colorful/icons/48x48/plus.png | CC BY-ND 3.0 | https://icons8.com
29qt_themes/colorful/icons/48x48/sd_card.png | CC BY-ND 3.0 | https://icons8.com
30
31<!-- TODO: Add the license of the yuzu icon --> \ No newline at end of file
diff --git a/dist/qt_themes/colorful/icons/16x16/lock.png b/dist/qt_themes/colorful/icons/16x16/lock.png
new file mode 100644
index 000000000..fd27069d8
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/16x16/lock.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/256x256/plus_folder.png b/dist/qt_themes/colorful/icons/256x256/plus_folder.png
new file mode 100644
index 000000000..760fe6245
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/256x256/plus_folder.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/48x48/bad_folder.png b/dist/qt_themes/colorful/icons/48x48/bad_folder.png
new file mode 100644
index 000000000..a7ab7a1f6
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/48x48/bad_folder.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/48x48/chip.png b/dist/qt_themes/colorful/icons/48x48/chip.png
new file mode 100644
index 000000000..6fa158999
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/48x48/chip.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/48x48/folder.png b/dist/qt_themes/colorful/icons/48x48/folder.png
new file mode 100644
index 000000000..498de4c62
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/48x48/folder.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/48x48/plus.png b/dist/qt_themes/colorful/icons/48x48/plus.png
new file mode 100644
index 000000000..bc2c47c91
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/48x48/plus.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/48x48/sd_card.png b/dist/qt_themes/colorful/icons/48x48/sd_card.png
new file mode 100644
index 000000000..29be71a0d
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/48x48/sd_card.png
Binary files differ
diff --git a/dist/qt_themes/colorful/icons/index.theme b/dist/qt_themes/colorful/icons/index.theme
new file mode 100644
index 000000000..b452aca16
--- /dev/null
+++ b/dist/qt_themes/colorful/icons/index.theme
@@ -0,0 +1,14 @@
1[Icon Theme]
2Name=colorful
3Comment=Colorful theme
4Inherits=default
5Directories=16x16,48x48,256x256
6
7[16x16]
8Size=16
9
10[48x48]
11Size=48
12
13[256x256]
14Size=256
diff --git a/dist/qt_themes/colorful/style.qrc b/dist/qt_themes/colorful/style.qrc
new file mode 100644
index 000000000..af2f3fd56
--- /dev/null
+++ b/dist/qt_themes/colorful/style.qrc
@@ -0,0 +1,15 @@
1<RCC>
2 <qresource prefix="icons/colorful">
3 <file alias="index.theme">icons/index.theme</file>
4 <file alias="16x16/lock.png">icons/16x16/lock.png</file>
5 <file alias="48x48/bad_folder.png">icons/48x48/bad_folder.png</file>
6 <file alias="48x48/chip.png">icons/48x48/chip.png</file>
7 <file alias="48x48/folder.png">icons/48x48/folder.png</file>
8 <file alias="48x48/plus.png">icons/48x48/plus.png</file>
9 <file alias="48x48/sd_card.png">icons/48x48/sd_card.png</file>
10 <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
11 </qresource>
12 <qresource prefix="colorful">
13 <file>style.qss</file>
14 </qresource>
15</RCC>
diff --git a/dist/qt_themes/colorful/style.qss b/dist/qt_themes/colorful/style.qss
new file mode 100644
index 000000000..413fc81da
--- /dev/null
+++ b/dist/qt_themes/colorful/style.qss
@@ -0,0 +1,4 @@
1/*
2 This file is intentionally left blank.
3 We do not want to apply any stylesheet for colorful, only icons.
4*/
diff --git a/dist/qt_themes/colorful_dark/icons/16x16/lock.png b/dist/qt_themes/colorful_dark/icons/16x16/lock.png
new file mode 100644
index 000000000..32c505848
--- /dev/null
+++ b/dist/qt_themes/colorful_dark/icons/16x16/lock.png
Binary files differ
diff --git a/dist/qt_themes/colorful_dark/icons/index.theme b/dist/qt_themes/colorful_dark/icons/index.theme
new file mode 100644
index 000000000..94d5ae8aa
--- /dev/null
+++ b/dist/qt_themes/colorful_dark/icons/index.theme
@@ -0,0 +1,8 @@
1[Icon Theme]
2Name=colorful_dark
3Comment=Colorful theme (Dark style)
4Inherits=default
5Directories=16x16
6
7[16x16]
8Size=16
diff --git a/dist/qt_themes/colorful_dark/style.qrc b/dist/qt_themes/colorful_dark/style.qrc
new file mode 100644
index 000000000..27a6cc87d
--- /dev/null
+++ b/dist/qt_themes/colorful_dark/style.qrc
@@ -0,0 +1,57 @@
1<RCC>
2 <qresource prefix="icons/colorful_dark">
3 <file alias="index.theme">icons/index.theme</file>
4 <file alias="16x16/lock.png">icons/16x16/lock.png</file>
5 <file alias="48x48/bad_folder.png">../colorful/icons/48x48/bad_folder.png</file>
6 <file alias="48x48/chip.png">../colorful/icons/48x48/chip.png</file>
7 <file alias="48x48/folder.png">../colorful/icons/48x48/folder.png</file>
8 <file alias="48x48/plus.png">../colorful/icons/48x48/plus.png</file>
9 <file alias="48x48/sd_card.png">../colorful/icons/48x48/sd_card.png</file>
10 <file alias="256x256/plus_folder.png">../colorful/icons/256x256/plus_folder.png</file>
11 </qresource>
12
13 <qresource prefix="qss_icons">
14 <file alias="rc/up_arrow_disabled.png">../qdarkstyle/rc/up_arrow_disabled.png</file>
15 <file alias="rc/Hmovetoolbar.png">../qdarkstyle/rc/Hmovetoolbar.png</file>
16 <file alias="rc/stylesheet-branch-end.png">../qdarkstyle/rc/stylesheet-branch-end.png</file>
17 <file alias="rc/branch_closed-on.png">../qdarkstyle/rc/branch_closed-on.png</file>
18 <file alias="rc/stylesheet-vline.png">../qdarkstyle/rc/stylesheet-vline.png</file>
19 <file alias="rc/branch_closed.png">../qdarkstyle/rc/branch_closed.png</file>
20 <file alias="rc/branch_open-on.png">../qdarkstyle/rc/branch_open-on.png</file>
21 <file alias="rc/transparent.png">../qdarkstyle/rc/transparent.png</file>
22 <file alias="rc/right_arrow_disabled.png">../qdarkstyle/rc/right_arrow_disabled.png</file>
23 <file alias="rc/sizegrip.png">../qdarkstyle/rc/sizegrip.png</file>
24 <file alias="rc/close.png">../qdarkstyle/rc/close.png</file>
25 <file alias="rc/close-hover.png">../qdarkstyle/rc/close-hover.png</file>
26 <file alias="rc/close-pressed.png">../qdarkstyle/rc/close-pressed.png</file>
27 <file alias="rc/down_arrow.png">../qdarkstyle/rc/down_arrow.png</file>
28 <file alias="rc/Vmovetoolbar.png">../qdarkstyle/rc/Vmovetoolbar.png</file>
29 <file alias="rc/left_arrow.png">../qdarkstyle/rc/left_arrow.png</file>
30 <file alias="rc/stylesheet-branch-more.png">../qdarkstyle/rc/stylesheet-branch-more.png</file>
31 <file alias="rc/up_arrow.png">../qdarkstyle/rc/up_arrow.png</file>
32 <file alias="rc/right_arrow.png">../qdarkstyle/rc/right_arrow.png</file>
33 <file alias="rc/left_arrow_disabled.png">../qdarkstyle/rc/left_arrow_disabled.png</file>
34 <file alias="rc/Hsepartoolbar.png">../qdarkstyle/rc/Hsepartoolbar.png</file>
35 <file alias="rc/branch_open.png">../qdarkstyle/rc/branch_open.png</file>
36 <file alias="rc/Vsepartoolbar.png">../qdarkstyle/rc/Vsepartoolbar.png</file>
37 <file alias="rc/down_arrow_disabled.png">../qdarkstyle/rc/down_arrow_disabled.png</file>
38 <file alias="rc/undock.png">../qdarkstyle/rc/undock.png</file>
39 <file alias="rc/checkbox_checked_disabled.png">../qdarkstyle/rc/checkbox_checked_disabled.png</file>
40 <file alias="rc/checkbox_checked_focus.png">../qdarkstyle/rc/checkbox_checked_focus.png</file>
41 <file alias="rc/checkbox_checked.png">../qdarkstyle/rc/checkbox_checked.png</file>
42 <file alias="rc/checkbox_indeterminate.png">../qdarkstyle/rc/checkbox_indeterminate.png</file>
43 <file alias="rc/checkbox_indeterminate_focus.png">../qdarkstyle/rc/checkbox_indeterminate_focus.png</file>
44 <file alias="rc/checkbox_unchecked_disabled.png">../qdarkstyle/rc/checkbox_unchecked_disabled.png</file>
45 <file alias="rc/checkbox_unchecked_focus.png">../qdarkstyle/rc/checkbox_unchecked_focus.png</file>
46 <file alias="rc/checkbox_unchecked.png">../qdarkstyle/rc/checkbox_unchecked.png</file>
47 <file alias="rc/radio_checked_disabled.png">../qdarkstyle/rc/radio_checked_disabled.png</file>
48 <file alias="rc/radio_checked_focus.png">../qdarkstyle/rc/radio_checked_focus.png</file>
49 <file alias="rc/radio_checked.png">../qdarkstyle/rc/radio_checked.png</file>
50 <file alias="rc/radio_unchecked_disabled.png">../qdarkstyle/rc/radio_unchecked_disabled.png</file>
51 <file alias="rc/radio_unchecked_focus.png">../qdarkstyle/rc/radio_unchecked_focus.png</file>
52 <file alias="rc/radio_unchecked.png">../qdarkstyle/rc/radio_unchecked.png</file>
53 </qresource>
54 <qresource prefix="colorful_dark">
55 <file alias="style.qss">../qdarkstyle/style.qss</file>
56 </qresource>
57</RCC>
diff --git a/dist/qt_themes/default/default.qrc b/dist/qt_themes/default/default.qrc
index 14a0cf6f9..d1a0ee1be 100644
--- a/dist/qt_themes/default/default.qrc
+++ b/dist/qt_themes/default/default.qrc
@@ -5,7 +5,21 @@
5 <file alias="16x16/checked.png">icons/16x16/checked.png</file> 5 <file alias="16x16/checked.png">icons/16x16/checked.png</file>
6 6
7 <file alias="16x16/failed.png">icons/16x16/failed.png</file> 7 <file alias="16x16/failed.png">icons/16x16/failed.png</file>
8
9 <file alias="16x16/lock.png">icons/16x16/lock.png</file>
10
11 <file alias="48x48/bad_folder.png">icons/48x48/bad_folder.png</file>
12
13 <file alias="48x48/chip.png">icons/48x48/chip.png</file>
14
15 <file alias="48x48/folder.png">icons/48x48/folder.png</file>
16
17 <file alias="48x48/plus.png">icons/48x48/plus.png</file>
18
19 <file alias="48x48/sd_card.png">icons/48x48/sd_card.png</file>
8 20
9 <file alias="256x256/yuzu.png">icons/256x256/yuzu.png</file> 21 <file alias="256x256/yuzu.png">icons/256x256/yuzu.png</file>
22
23 <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
10 </qresource> 24 </qresource>
11</RCC> 25</RCC>
diff --git a/dist/qt_themes/default/icons/16x16/lock.png b/dist/qt_themes/default/icons/16x16/lock.png
new file mode 100644
index 000000000..496b58078
--- /dev/null
+++ b/dist/qt_themes/default/icons/16x16/lock.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/256x256/plus_folder.png b/dist/qt_themes/default/icons/256x256/plus_folder.png
new file mode 100644
index 000000000..ae4afccc7
--- /dev/null
+++ b/dist/qt_themes/default/icons/256x256/plus_folder.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/48x48/bad_folder.png b/dist/qt_themes/default/icons/48x48/bad_folder.png
new file mode 100644
index 000000000..2527c1318
--- /dev/null
+++ b/dist/qt_themes/default/icons/48x48/bad_folder.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/48x48/chip.png b/dist/qt_themes/default/icons/48x48/chip.png
new file mode 100644
index 000000000..3efdf301e
--- /dev/null
+++ b/dist/qt_themes/default/icons/48x48/chip.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/48x48/folder.png b/dist/qt_themes/default/icons/48x48/folder.png
new file mode 100644
index 000000000..2e67d8b38
--- /dev/null
+++ b/dist/qt_themes/default/icons/48x48/folder.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/48x48/plus.png b/dist/qt_themes/default/icons/48x48/plus.png
new file mode 100644
index 000000000..dbc74687b
--- /dev/null
+++ b/dist/qt_themes/default/icons/48x48/plus.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/48x48/sd_card.png b/dist/qt_themes/default/icons/48x48/sd_card.png
new file mode 100644
index 000000000..edacaeeb5
--- /dev/null
+++ b/dist/qt_themes/default/icons/48x48/sd_card.png
Binary files differ
diff --git a/dist/qt_themes/default/icons/index.theme b/dist/qt_themes/default/icons/index.theme
index ac67cb236..1edbe6408 100644
--- a/dist/qt_themes/default/icons/index.theme
+++ b/dist/qt_themes/default/icons/index.theme
@@ -1,10 +1,13 @@
1[Icon Theme] 1[Icon Theme]
2Name=default 2Name=default
3Comment=default theme 3Comment=default theme
4Directories=16x16,256x256 4Directories=16x16,48x48,256x256
5 5
6[16x16] 6[16x16]
7Size=16 7Size=16
8
9[48x48]
10Size=48
8 11
9[256x256] 12[256x256]
10Size=256 \ No newline at end of file 13Size=256 \ No newline at end of file
diff --git a/dist/qt_themes/qdarkstyle/icons/16x16/lock.png b/dist/qt_themes/qdarkstyle/icons/16x16/lock.png
new file mode 100644
index 000000000..c750a39e8
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/16x16/lock.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/256x256/plus_folder.png b/dist/qt_themes/qdarkstyle/icons/256x256/plus_folder.png
new file mode 100644
index 000000000..303f9a321
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/256x256/plus_folder.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/48x48/bad_folder.png b/dist/qt_themes/qdarkstyle/icons/48x48/bad_folder.png
new file mode 100644
index 000000000..4a9709623
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/48x48/bad_folder.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/48x48/chip.png b/dist/qt_themes/qdarkstyle/icons/48x48/chip.png
new file mode 100644
index 000000000..973fabd05
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/48x48/chip.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/48x48/folder.png b/dist/qt_themes/qdarkstyle/icons/48x48/folder.png
new file mode 100644
index 000000000..0f1e987d6
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/48x48/folder.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/48x48/plus.png b/dist/qt_themes/qdarkstyle/icons/48x48/plus.png
new file mode 100644
index 000000000..16cc8b4f4
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/48x48/plus.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/48x48/sd_card.png b/dist/qt_themes/qdarkstyle/icons/48x48/sd_card.png
new file mode 100644
index 000000000..0291c6542
--- /dev/null
+++ b/dist/qt_themes/qdarkstyle/icons/48x48/sd_card.png
Binary files differ
diff --git a/dist/qt_themes/qdarkstyle/icons/index.theme b/dist/qt_themes/qdarkstyle/icons/index.theme
index 558ece40b..d1e12f3ef 100644
--- a/dist/qt_themes/qdarkstyle/icons/index.theme
+++ b/dist/qt_themes/qdarkstyle/icons/index.theme
@@ -2,10 +2,13 @@
2Name=qdarkstyle 2Name=qdarkstyle
3Comment=dark theme 3Comment=dark theme
4Inherits=default 4Inherits=default
5Directories=16x16,256x256 5Directories=16x16,48x48,256x256
6 6
7[16x16] 7[16x16]
8Size=16 8Size=16
9 9
10[48x48]
11Size=48
12
10[256x256] 13[256x256]
11Size=256 \ No newline at end of file 14Size=256 \ No newline at end of file
diff --git a/dist/qt_themes/qdarkstyle/style.qrc b/dist/qt_themes/qdarkstyle/style.qrc
index efbd0b9dc..c2c14c28a 100644
--- a/dist/qt_themes/qdarkstyle/style.qrc
+++ b/dist/qt_themes/qdarkstyle/style.qrc
@@ -1,6 +1,13 @@
1<RCC> 1<RCC>
2 <qresource prefix="icons/qdarkstyle"> 2 <qresource prefix="icons/qdarkstyle">
3 <file alias="index.theme">icons/index.theme</file> 3 <file alias="index.theme">icons/index.theme</file>
4 <file alias="16x16/lock.png">icons/16x16/lock.png</file>
5 <file alias="48x48/bad_folder.png">icons/48x48/bad_folder.png</file>
6 <file alias="48x48/chip.png">icons/48x48/chip.png</file>
7 <file alias="48x48/folder.png">icons/48x48/folder.png</file>
8 <file alias="48x48/plus.png">icons/48x48/plus.png</file>
9 <file alias="48x48/sd_card.png">icons/48x48/sd_card.png</file>
10 <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
4 </qresource> 11 </qresource>
5 <qresource prefix="qss_icons"> 12 <qresource prefix="qss_icons">
6 <file>rc/up_arrow_disabled.png</file> 13 <file>rc/up_arrow_disabled.png</file>
diff --git a/externals/fmt b/externals/fmt
Subproject 9e554999ce02cf86fcdfe74fe740c4fe3f5a56d Subproject 7512a55aa3ae309587ca89668ef9ec4074a51a1
diff --git a/license.txt b/license.txt
index d511905c1..2b858f9a7 100644
--- a/license.txt
+++ b/license.txt
@@ -337,3 +337,19 @@ proprietary programs. If your program is a subroutine library, you may
337consider it more useful to permit linking proprietary applications with the 337consider it more useful to permit linking proprietary applications with the
338library. If this is what you want to do, use the GNU Lesser General 338library. If this is what you want to do, use the GNU Lesser General
339Public License instead of this License. 339Public License instead of this License.
340
341
342The icons used in this project have the following licenses:
343
344Icon Name | License | Origin/Author
345--- | --- | ---
346checked.png | Free for non-commercial use
347failed.png | Free for non-commercial use
348lock.png | CC BY-ND 3.0 | https://icons8.com
349plus_folder.png | CC BY-ND 3.0 | https://icons8.com
350bad_folder.png | CC BY-ND 3.0 | https://icons8.com
351chip.png | CC BY-ND 3.0 | https://icons8.com
352folder.png | CC BY-ND 3.0 | https://icons8.com
353plus.png (Default, Dark) | CC0 1.0 | Designed by BreadFish64 from the Citra team
354plus.png (Colorful, Colorful Dark) | CC BY-ND 3.0 | https://icons8.com
355sd_card.png | CC BY-ND 3.0 | https://icons8.com
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 9a0939883..da50a0bbc 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -73,13 +73,15 @@ private:
73 EffectInStatus info{}; 73 EffectInStatus info{};
74}; 74};
75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
77 std::size_t instance_number)
77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 78 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
78 effects(params.effect_count) { 79 effects(params.effect_count) {
79 80
80 audio_out = std::make_unique<AudioCore::AudioOut>(); 81 audio_out = std::make_unique<AudioCore::AudioOut>();
81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, 82 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
82 "AudioRenderer", [=]() { buffer_event->Signal(); }); 83 fmt::format("AudioRenderer-Instance{}", instance_number),
84 [=]() { buffer_event->Signal(); });
83 audio_out->StartStream(stream); 85 audio_out->StartStream(stream);
84 86
85 QueueMixedBuffer(0); 87 QueueMixedBuffer(0);
@@ -217,13 +219,15 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_co
217 if (offset == samples.size()) { 219 if (offset == samples.size()) {
218 offset = 0; 220 offset = 0;
219 221
220 if (!wave_buffer.is_looping) { 222 if (!wave_buffer.is_looping && wave_buffer.buffer_sz) {
221 SetWaveIndex(wave_index + 1); 223 SetWaveIndex(wave_index + 1);
222 } 224 }
223 225
224 out_status.wave_buffer_consumed++; 226 if (wave_buffer.buffer_sz) {
227 out_status.wave_buffer_consumed++;
228 }
225 229
226 if (wave_buffer.end_of_stream) { 230 if (wave_buffer.end_of_stream || wave_buffer.buffer_sz == 0) {
227 info.play_state = PlayState::Paused; 231 info.play_state = PlayState::Paused;
228 } 232 }
229 } 233 }
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b2e5d336c..45afbe759 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
215class AudioRenderer { 215class AudioRenderer {
216public: 216public:
217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
219 std::size_t instance_number);
219 ~AudioRenderer(); 220 ~AudioRenderer();
220 221
221 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); 222 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 198b3fe07..01abdb3bb 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
44 "${VIDEO_CORE}/shader/decode/half_set.cpp" 44 "${VIDEO_CORE}/shader/decode/half_set.cpp"
45 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" 45 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
46 "${VIDEO_CORE}/shader/decode/hfma2.cpp" 46 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
47 "${VIDEO_CORE}/shader/decode/image.cpp"
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 49 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 50 "${VIDEO_CORE}/shader/decode/memory.cpp"
@@ -54,7 +55,10 @@ add_custom_command(OUTPUT scm_rev.cpp
54 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
55 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
56 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/warp.cpp"
57 "${VIDEO_CORE}/shader/decode/xmad.cpp" 59 "${VIDEO_CORE}/shader/decode/xmad.cpp"
60 "${VIDEO_CORE}/shader/control_flow.cpp"
61 "${VIDEO_CORE}/shader/control_flow.h"
58 "${VIDEO_CORE}/shader/decode.cpp" 62 "${VIDEO_CORE}/shader/decode.cpp"
59 "${VIDEO_CORE}/shader/node.h" 63 "${VIDEO_CORE}/shader/node.h"
60 "${VIDEO_CORE}/shader/node_helper.cpp" 64 "${VIDEO_CORE}/shader/node_helper.cpp"
@@ -74,6 +78,7 @@ add_library(common STATIC
74 assert.h 78 assert.h
75 detached_tasks.cpp 79 detached_tasks.cpp
76 detached_tasks.h 80 detached_tasks.h
81 binary_find.h
77 bit_field.h 82 bit_field.h
78 bit_util.h 83 bit_util.h
79 cityhash.cpp 84 cityhash.cpp
diff --git a/src/common/alignment.h b/src/common/alignment.h
index d94a2291f..88d5d3a65 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -3,6 +3,7 @@
3#pragma once 3#pragma once
4 4
5#include <cstddef> 5#include <cstddef>
6#include <memory>
6#include <type_traits> 7#include <type_traits>
7 8
8namespace Common { 9namespace Common {
@@ -20,6 +21,12 @@ constexpr T AlignDown(T value, std::size_t size) {
20} 21}
21 22
22template <typename T> 23template <typename T>
24constexpr T AlignBits(T value, std::size_t align) {
25 static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
26 return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align);
27}
28
29template <typename T>
23constexpr bool Is4KBAligned(T value) { 30constexpr bool Is4KBAligned(T value) {
24 static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); 31 static_assert(std::is_unsigned_v<T>, "T must be an unsigned value.");
25 return (value & 0xFFF) == 0; 32 return (value & 0xFFF) == 0;
@@ -31,4 +38,63 @@ constexpr bool IsWordAligned(T value) {
31 return (value & 0b11) == 0; 38 return (value & 0b11) == 0;
32} 39}
33 40
41template <typename T, std::size_t Align = 16>
42class AlignmentAllocator {
43public:
44 using value_type = T;
45 using size_type = std::size_t;
46 using difference_type = std::ptrdiff_t;
47
48 using pointer = T*;
49 using const_pointer = const T*;
50
51 using reference = T&;
52 using const_reference = const T&;
53
54public:
55 pointer address(reference r) noexcept {
56 return std::addressof(r);
57 }
58
59 const_pointer address(const_reference r) const noexcept {
60 return std::addressof(r);
61 }
62
63 pointer allocate(size_type n) {
64 return static_cast<pointer>(::operator new (n, std::align_val_t{Align}));
65 }
66
67 void deallocate(pointer p, size_type) {
68 ::operator delete (p, std::align_val_t{Align});
69 }
70
71 void construct(pointer p, const value_type& wert) {
72 new (p) value_type(wert);
73 }
74
75 void destroy(pointer p) {
76 p->~value_type();
77 }
78
79 size_type max_size() const noexcept {
80 return size_type(-1) / sizeof(value_type);
81 }
82
83 template <typename T2>
84 struct rebind {
85 using other = AlignmentAllocator<T2, Align>;
86 };
87
88 bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept {
89 return !(*this == other);
90 }
91
92 // Returns true if and only if storage allocated from *this
93 // can be deallocated from other, and vice versa.
94 // Always returns true for stateless allocators.
95 bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept {
96 return true;
97 }
98};
99
34} // namespace Common 100} // namespace Common
diff --git a/src/common/binary_find.h b/src/common/binary_find.h
new file mode 100644
index 000000000..5cc523bf9
--- /dev/null
+++ b/src/common/binary_find.h
@@ -0,0 +1,21 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8
9namespace Common {
10
11template <class ForwardIt, class T, class Compare = std::less<>>
12ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
13 // Note: BOTH type T and the type after ForwardIt is dereferenced
14 // must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
15 // This is stricter than lower_bound requirement (see above)
16
17 first = std::lower_bound(first, last, value, comp);
18 return first != last && !comp(value, *first) ? first : last;
19}
20
21} // namespace Common
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index d032df413..6f7d5a947 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) {
97} 97}
98#endif 98#endif
99 99
100#ifdef _MSC_VER
101
102inline u32 MostSignificantBit32(const u32 value) {
103 unsigned long result;
104 _BitScanReverse(&result, value);
105 return static_cast<u32>(result);
106}
107
108inline u32 MostSignificantBit64(const u64 value) {
109 unsigned long result;
110 _BitScanReverse64(&result, value);
111 return static_cast<u32>(result);
112}
113
114#else
115
116inline u32 MostSignificantBit32(const u32 value) {
117 return 31U - static_cast<u32>(__builtin_clz(value));
118}
119
120inline u32 MostSignificantBit64(const u64 value) {
121 return 63U - static_cast<u32>(__builtin_clzll(value));
122}
123
124#endif
125
126inline u32 Log2Floor32(const u32 value) {
127 return MostSignificantBit32(value);
128}
129
130inline u32 Log2Ceil32(const u32 value) {
131 const u32 log2_f = Log2Floor32(value);
132 return log2_f + ((value ^ (1U << log2_f)) != 0U);
133}
134
135inline u32 Log2Floor64(const u64 value) {
136 return MostSignificantBit64(value);
137}
138
139inline u32 Log2Ceil64(const u64 value) {
140 const u64 log2_f = static_cast<u64>(Log2Floor64(value));
141 return static_cast<u32>(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL));
142}
143
100} // namespace Common 144} // namespace Common
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 8b0d34da6..04ecac959 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <string> 8#include <string>
8 9
9#if !defined(ARCHITECTURE_x86_64) 10#if !defined(ARCHITECTURE_x86_64)
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index cb77b99ee..877a9e353 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -70,6 +70,8 @@ add_library(core STATIC
70 file_sys/sdmc_factory.h 70 file_sys/sdmc_factory.h
71 file_sys/submission_package.cpp 71 file_sys/submission_package.cpp
72 file_sys/submission_package.h 72 file_sys/submission_package.h
73 file_sys/system_archive/mii_model.cpp
74 file_sys/system_archive/mii_model.h
73 file_sys/system_archive/ng_word.cpp 75 file_sys/system_archive/ng_word.cpp
74 file_sys/system_archive/ng_word.h 76 file_sys/system_archive/ng_word.h
75 file_sys/system_archive/system_archive.cpp 77 file_sys/system_archive/system_archive.cpp
@@ -111,6 +113,8 @@ add_library(core STATIC
111 frontend/scope_acquire_window_context.h 113 frontend/scope_acquire_window_context.h
112 gdbstub/gdbstub.cpp 114 gdbstub/gdbstub.cpp
113 gdbstub/gdbstub.h 115 gdbstub/gdbstub.h
116 hardware_interrupt_manager.cpp
117 hardware_interrupt_manager.h
114 hle/ipc.h 118 hle/ipc.h
115 hle/ipc_helpers.h 119 hle/ipc_helpers.h
116 hle/kernel/address_arbiter.cpp 120 hle/kernel/address_arbiter.cpp
@@ -175,6 +179,7 @@ add_library(core STATIC
175 hle/service/acc/acc_u0.h 179 hle/service/acc/acc_u0.h
176 hle/service/acc/acc_u1.cpp 180 hle/service/acc/acc_u1.cpp
177 hle/service/acc/acc_u1.h 181 hle/service/acc/acc_u1.h
182 hle/service/acc/errors.h
178 hle/service/acc/profile_manager.cpp 183 hle/service/acc/profile_manager.cpp
179 hle/service/acc/profile_manager.h 184 hle/service/acc/profile_manager.h
180 hle/service/am/am.cpp 185 hle/service/am/am.cpp
@@ -207,6 +212,8 @@ add_library(core STATIC
207 hle/service/aoc/aoc_u.h 212 hle/service/aoc/aoc_u.h
208 hle/service/apm/apm.cpp 213 hle/service/apm/apm.cpp
209 hle/service/apm/apm.h 214 hle/service/apm/apm.h
215 hle/service/apm/controller.cpp
216 hle/service/apm/controller.h
210 hle/service/apm/interface.cpp 217 hle/service/apm/interface.cpp
211 hle/service/apm/interface.h 218 hle/service/apm/interface.h
212 hle/service/audio/audctl.cpp 219 hle/service/audio/audctl.cpp
@@ -270,6 +277,7 @@ add_library(core STATIC
270 hle/service/filesystem/fsp_srv.h 277 hle/service/filesystem/fsp_srv.h
271 hle/service/fgm/fgm.cpp 278 hle/service/fgm/fgm.cpp
272 hle/service/fgm/fgm.h 279 hle/service/fgm/fgm.h
280 hle/service/friend/errors.h
273 hle/service/friend/friend.cpp 281 hle/service/friend/friend.cpp
274 hle/service/friend/friend.h 282 hle/service/friend/friend.h
275 hle/service/friend/interface.cpp 283 hle/service/friend/interface.cpp
@@ -291,6 +299,7 @@ add_library(core STATIC
291 hle/service/hid/irs.h 299 hle/service/hid/irs.h
292 hle/service/hid/xcd.cpp 300 hle/service/hid/xcd.cpp
293 hle/service/hid/xcd.h 301 hle/service/hid/xcd.h
302 hle/service/hid/errors.h
294 hle/service/hid/controllers/controller_base.cpp 303 hle/service/hid/controllers/controller_base.cpp
295 hle/service/hid/controllers/controller_base.h 304 hle/service/hid/controllers/controller_base.h
296 hle/service/hid/controllers/debug_pad.cpp 305 hle/service/hid/controllers/debug_pad.cpp
@@ -367,6 +376,7 @@ add_library(core STATIC
367 hle/service/nvdrv/devices/nvmap.h 376 hle/service/nvdrv/devices/nvmap.h
368 hle/service/nvdrv/interface.cpp 377 hle/service/nvdrv/interface.cpp
369 hle/service/nvdrv/interface.h 378 hle/service/nvdrv/interface.h
379 hle/service/nvdrv/nvdata.h
370 hle/service/nvdrv/nvdrv.cpp 380 hle/service/nvdrv/nvdrv.cpp
371 hle/service/nvdrv/nvdrv.h 381 hle/service/nvdrv/nvdrv.h
372 hle/service/nvdrv/nvmemp.cpp 382 hle/service/nvdrv/nvmemp.cpp
@@ -429,6 +439,8 @@ add_library(core STATIC
429 hle/service/time/interface.h 439 hle/service/time/interface.h
430 hle/service/time/time.cpp 440 hle/service/time/time.cpp
431 hle/service/time/time.h 441 hle/service/time/time.h
442 hle/service/time/time_sharedmemory.cpp
443 hle/service/time/time_sharedmemory.h
432 hle/service/usb/usb.cpp 444 hle/service/usb/usb.cpp
433 hle/service/usb/usb.h 445 hle/service/usb/usb.h
434 hle/service/vi/display/vi_display.cpp 446 hle/service/vi/display/vi_display.cpp
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c6691a8e1..45e94e625 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -44,13 +44,6 @@ public:
44 /// Step CPU by one instruction 44 /// Step CPU by one instruction
45 virtual void Step() = 0; 45 virtual void Step() = 0;
46 46
47 /// Maps a backing memory region for the CPU
48 virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
49 Kernel::VMAPermission perms) = 0;
50
51 /// Unmaps a region of memory that was previously mapped using MapBackingMemory
52 virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
53
54 /// Clear all instruction cache 47 /// Clear all instruction cache
55 virtual void ClearInstructionCache() = 0; 48 virtual void ClearInstructionCache() = 0;
56 49
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 44307fa19..f1506b372 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
177 177
178ARM_Dynarmic::~ARM_Dynarmic() = default; 178ARM_Dynarmic::~ARM_Dynarmic() = default;
179 179
180void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
181 Kernel::VMAPermission perms) {
182 inner_unicorn.MapBackingMemory(address, size, memory, perms);
183}
184
185void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
186 inner_unicorn.UnmapMemory(address, size);
187}
188
189void ARM_Dynarmic::SetPC(u64 pc) { 180void ARM_Dynarmic::SetPC(u64 pc) {
190 jit->SetPC(pc); 181 jit->SetPC(pc);
191} 182}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index b701e97a3..504d46c68 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -23,9 +23,6 @@ public:
23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
24 ~ARM_Dynarmic() override; 24 ~ARM_Dynarmic() override;
25 25
26 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
27 Kernel::VMAPermission perms) override;
28 void UnmapMemory(u64 address, std::size_t size) override;
29 void SetPC(u64 pc) override; 26 void SetPC(u64 pc) override;
30 u64 GetPC() const override; 27 u64 GetPC() const override;
31 u64 GetReg(int index) const override; 28 u64 GetReg(int index) const override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4e07fe8b5..97d5c2a8a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
50 50
51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, 51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
52 void* user_data) { 52 void* user_data) {
53 auto* const system = static_cast<System*>(user_data);
54
53 ARM_Interface::ThreadContext ctx{}; 55 ARM_Interface::ThreadContext ctx{};
54 Core::CurrentArmInterface().SaveContext(ctx); 56 system->CurrentArmInterface().SaveContext(ctx);
55 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 57 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
56 ctx.pc, ctx.cpu_registers[30]); 58 ctx.pc, ctx.cpu_registers[30]);
57 return {}; 59
60 return false;
58} 61}
59 62
60ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { 63ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
65 68
66 uc_hook hook{}; 69 uc_hook hook{};
67 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 71 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
69 if (GDBStub::IsServerEnabled()) { 72 if (GDBStub::IsServerEnabled()) {
70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); 73 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
71 last_bkpt_hit = false; 74 last_bkpt_hit = false;
@@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() {
76 CHECKED(uc_close(uc)); 79 CHECKED(uc_close(uc));
77} 80}
78 81
79void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
80 Kernel::VMAPermission perms) {
81 CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
82}
83
84void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
85 CHECKED(uc_mem_unmap(uc, address, size));
86}
87
88void ARM_Unicorn::SetPC(u64 pc) { 82void ARM_Unicorn::SetPC(u64 pc) {
89 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); 83 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
90} 84}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 34e974b4d..fe2ffd70c 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -18,9 +18,6 @@ public:
18 explicit ARM_Unicorn(System& system); 18 explicit ARM_Unicorn(System& system);
19 ~ARM_Unicorn() override; 19 ~ARM_Unicorn() override;
20 20
21 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
22 Kernel::VMAPermission perms) override;
23 void UnmapMemory(VAddr address, std::size_t size) override;
24 void SetPC(u64 pc) override; 21 void SetPC(u64 pc) override;
25 u64 GetPC() const override; 22 u64 GetPC() const override;
26 u64 GetReg(int index) const override; 23 u64 GetReg(int index) const override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 262411db8..3d0978cbf 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -19,12 +19,14 @@
19#include "core/file_sys/vfs_concat.h" 19#include "core/file_sys/vfs_concat.h"
20#include "core/file_sys/vfs_real.h" 20#include "core/file_sys/vfs_real.h"
21#include "core/gdbstub/gdbstub.h" 21#include "core/gdbstub/gdbstub.h"
22#include "core/hardware_interrupt_manager.h"
22#include "core/hle/kernel/client_port.h" 23#include "core/hle/kernel/client_port.h"
23#include "core/hle/kernel/kernel.h" 24#include "core/hle/kernel/kernel.h"
24#include "core/hle/kernel/process.h" 25#include "core/hle/kernel/process.h"
25#include "core/hle/kernel/scheduler.h" 26#include "core/hle/kernel/scheduler.h"
26#include "core/hle/kernel/thread.h" 27#include "core/hle/kernel/thread.h"
27#include "core/hle/service/am/applets/applets.h" 28#include "core/hle/service/am/applets/applets.h"
29#include "core/hle/service/apm/controller.h"
28#include "core/hle/service/glue/manager.h" 30#include "core/hle/service/glue/manager.h"
29#include "core/hle/service/service.h" 31#include "core/hle/service/service.h"
30#include "core/hle/service/sm/sm.h" 32#include "core/hle/service/sm/sm.h"
@@ -102,7 +104,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
102 return vfs->OpenFile(path, FileSys::Mode::Read); 104 return vfs->OpenFile(path, FileSys::Mode::Read);
103} 105}
104struct System::Impl { 106struct System::Impl {
105 explicit Impl(System& system) : kernel{system}, cpu_core_manager{system}, reporter{system} {} 107 explicit Impl(System& system)
108 : kernel{system}, cpu_core_manager{system}, applet_manager{system}, reporter{system} {}
106 109
107 Cpu& CurrentCpuCore() { 110 Cpu& CurrentCpuCore() {
108 return cpu_core_manager.GetCurrentCore(); 111 return cpu_core_manager.GetCurrentCore();
@@ -143,14 +146,14 @@ struct System::Impl {
143 telemetry_session = std::make_unique<Core::TelemetrySession>(); 146 telemetry_session = std::make_unique<Core::TelemetrySession>();
144 service_manager = std::make_shared<Service::SM::ServiceManager>(); 147 service_manager = std::make_shared<Service::SM::ServiceManager>();
145 148
146 Service::Init(service_manager, system, *virtual_filesystem); 149 Service::Init(service_manager, system);
147 GDBStub::Init(); 150 GDBStub::Init();
148 151
149 renderer = VideoCore::CreateRenderer(emu_window, system); 152 renderer = VideoCore::CreateRenderer(emu_window, system);
150 if (!renderer->Init()) { 153 if (!renderer->Init()) {
151 return ResultStatus::ErrorVideoCore; 154 return ResultStatus::ErrorVideoCore;
152 } 155 }
153 156 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
154 gpu_core = VideoCore::CreateGPU(system); 157 gpu_core = VideoCore::CreateGPU(system);
155 158
156 is_powered_on = true; 159 is_powered_on = true;
@@ -297,6 +300,7 @@ struct System::Impl {
297 std::unique_ptr<VideoCore::RendererBase> renderer; 300 std::unique_ptr<VideoCore::RendererBase> renderer;
298 std::unique_ptr<Tegra::GPU> gpu_core; 301 std::unique_ptr<Tegra::GPU> gpu_core;
299 std::shared_ptr<Tegra::DebugContext> debug_context; 302 std::shared_ptr<Tegra::DebugContext> debug_context;
303 std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager;
300 CpuCoreManager cpu_core_manager; 304 CpuCoreManager cpu_core_manager;
301 bool is_powered_on = false; 305 bool is_powered_on = false;
302 306
@@ -306,6 +310,9 @@ struct System::Impl {
306 /// Frontend applets 310 /// Frontend applets
307 Service::AM::Applets::AppletManager applet_manager; 311 Service::AM::Applets::AppletManager applet_manager;
308 312
313 /// APM (Performance) services
314 Service::APM::Controller apm_controller{core_timing};
315
309 /// Glue services 316 /// Glue services
310 Service::Glue::ARPManager arp_manager; 317 Service::Glue::ARPManager arp_manager;
311 318
@@ -440,6 +447,14 @@ const Tegra::GPU& System::GPU() const {
440 return *impl->gpu_core; 447 return *impl->gpu_core;
441} 448}
442 449
450Core::Hardware::InterruptManager& System::InterruptManager() {
451 return *impl->interrupt_manager;
452}
453
454const Core::Hardware::InterruptManager& System::InterruptManager() const {
455 return *impl->interrupt_manager;
456}
457
443VideoCore::RendererBase& System::Renderer() { 458VideoCore::RendererBase& System::Renderer() {
444 return *impl->renderer; 459 return *impl->renderer;
445} 460}
@@ -568,6 +583,14 @@ const Service::Glue::ARPManager& System::GetARPManager() const {
568 return impl->arp_manager; 583 return impl->arp_manager;
569} 584}
570 585
586Service::APM::Controller& System::GetAPMController() {
587 return impl->apm_controller;
588}
589
590const Service::APM::Controller& System::GetAPMController() const {
591 return impl->apm_controller;
592}
593
571System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { 594System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) {
572 return impl->Init(*this, emu_window); 595 return impl->Init(*this, emu_window);
573} 596}
diff --git a/src/core/core.h b/src/core/core.h
index 70adb7af9..0138d93b0 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -43,6 +43,10 @@ struct AppletFrontendSet;
43class AppletManager; 43class AppletManager;
44} // namespace AM::Applets 44} // namespace AM::Applets
45 45
46namespace APM {
47class Controller;
48}
49
46namespace Glue { 50namespace Glue {
47class ARPManager; 51class ARPManager;
48} 52}
@@ -66,6 +70,10 @@ namespace Core::Timing {
66class CoreTiming; 70class CoreTiming;
67} 71}
68 72
73namespace Core::Hardware {
74class InterruptManager;
75}
76
69namespace Core { 77namespace Core {
70 78
71class ARM_Interface; 79class ARM_Interface;
@@ -230,6 +238,12 @@ public:
230 /// Provides a constant reference to the core timing instance. 238 /// Provides a constant reference to the core timing instance.
231 const Timing::CoreTiming& CoreTiming() const; 239 const Timing::CoreTiming& CoreTiming() const;
232 240
241 /// Provides a reference to the interrupt manager instance.
242 Core::Hardware::InterruptManager& InterruptManager();
243
244 /// Provides a constant reference to the interrupt manager instance.
245 const Core::Hardware::InterruptManager& InterruptManager() const;
246
233 /// Provides a reference to the kernel instance. 247 /// Provides a reference to the kernel instance.
234 Kernel::KernelCore& Kernel(); 248 Kernel::KernelCore& Kernel();
235 249
@@ -296,6 +310,10 @@ public:
296 310
297 const Service::Glue::ARPManager& GetARPManager() const; 311 const Service::Glue::ARPManager& GetARPManager() const;
298 312
313 Service::APM::Controller& GetAPMController();
314
315 const Service::APM::Controller& GetAPMController() const;
316
299private: 317private:
300 System(); 318 System();
301 319
@@ -319,10 +337,6 @@ private:
319 static System s_instance; 337 static System s_instance;
320}; 338};
321 339
322inline ARM_Interface& CurrentArmInterface() {
323 return System::GetInstance().CurrentArmInterface();
324}
325
326inline Kernel::Process* CurrentProcess() { 340inline Kernel::Process* CurrentProcess() {
327 return System::GetInstance().CurrentProcess(); 341 return System::GetInstance().CurrentProcess();
328} 342}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index ba63c3e61..21c410e34 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() {
53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
54 std::size_t core_index) 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
56 if (Settings::values.use_cpu_jit) {
57#ifdef ARCHITECTURE_x86_64 56#ifdef ARCHITECTURE_x86_64
58 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); 57 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
59#else 58#else
60 arm_interface = std::make_unique<ARM_Unicorn>(system); 59 arm_interface = std::make_unique<ARM_Unicorn>(system);
61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 60 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
62#endif 61#endif
63 } else {
64 arm_interface = std::make_unique<ARM_Unicorn>(system);
65 }
66 62
67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); 63 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
68} 64}
@@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
70Cpu::~Cpu() = default; 66Cpu::~Cpu() = default;
71 67
72std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { 68std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
73 if (Settings::values.use_cpu_jit) {
74#ifdef ARCHITECTURE_x86_64 69#ifdef ARCHITECTURE_x86_64
75 return std::make_unique<DynarmicExclusiveMonitor>(num_cores); 70 return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
76#else 71#else
77 return nullptr; // TODO(merry): Passthrough exclusive monitor 72 // TODO(merry): Passthrough exclusive monitor
73 return nullptr;
78#endif 74#endif
79 } else {
80 return nullptr; // TODO(merry): Passthrough exclusive monitor
81 }
82} 75}
83 76
84void Cpu::RunLoop(bool tight_loop) { 77void Cpu::RunLoop(bool tight_loop) {
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index 6dd633363..46aceec3d 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -37,6 +37,7 @@
37namespace Core::Crypto { 37namespace Core::Crypto {
38 38
39constexpr u64 CURRENT_CRYPTO_REVISION = 0x5; 39constexpr u64 CURRENT_CRYPTO_REVISION = 0x5;
40constexpr u64 FULL_TICKET_SIZE = 0x400;
40 41
41using namespace Common; 42using namespace Common;
42 43
@@ -55,6 +56,99 @@ const std::map<std::pair<S128KeyType, u64>, std::string> KEYS_VARIABLE_LENGTH{
55 {{S128KeyType::KeyblobMAC, 0}, "keyblob_mac_key_"}, 56 {{S128KeyType::KeyblobMAC, 0}, "keyblob_mac_key_"},
56}; 57};
57 58
59namespace {
60template <std::size_t Size>
61bool IsAllZeroArray(const std::array<u8, Size>& array) {
62 return std::all_of(array.begin(), array.end(), [](const auto& elem) { return elem == 0; });
63}
64} // namespace
65
66u64 GetSignatureTypeDataSize(SignatureType type) {
67 switch (type) {
68 case SignatureType::RSA_4096_SHA1:
69 case SignatureType::RSA_4096_SHA256:
70 return 0x200;
71 case SignatureType::RSA_2048_SHA1:
72 case SignatureType::RSA_2048_SHA256:
73 return 0x100;
74 case SignatureType::ECDSA_SHA1:
75 case SignatureType::ECDSA_SHA256:
76 return 0x3C;
77 }
78 UNREACHABLE();
79}
80
81u64 GetSignatureTypePaddingSize(SignatureType type) {
82 switch (type) {
83 case SignatureType::RSA_4096_SHA1:
84 case SignatureType::RSA_4096_SHA256:
85 case SignatureType::RSA_2048_SHA1:
86 case SignatureType::RSA_2048_SHA256:
87 return 0x3C;
88 case SignatureType::ECDSA_SHA1:
89 case SignatureType::ECDSA_SHA256:
90 return 0x40;
91 }
92 UNREACHABLE();
93}
94
95SignatureType Ticket::GetSignatureType() const {
96 if (auto ticket = std::get_if<RSA4096Ticket>(&data)) {
97 return ticket->sig_type;
98 }
99 if (auto ticket = std::get_if<RSA2048Ticket>(&data)) {
100 return ticket->sig_type;
101 }
102 if (auto ticket = std::get_if<ECDSATicket>(&data)) {
103 return ticket->sig_type;
104 }
105
106 UNREACHABLE();
107}
108
109TicketData& Ticket::GetData() {
110 if (auto ticket = std::get_if<RSA4096Ticket>(&data)) {
111 return ticket->data;
112 }
113 if (auto ticket = std::get_if<RSA2048Ticket>(&data)) {
114 return ticket->data;
115 }
116 if (auto ticket = std::get_if<ECDSATicket>(&data)) {
117 return ticket->data;
118 }
119
120 UNREACHABLE();
121}
122
123const TicketData& Ticket::GetData() const {
124 if (auto ticket = std::get_if<RSA4096Ticket>(&data)) {
125 return ticket->data;
126 }
127 if (auto ticket = std::get_if<RSA2048Ticket>(&data)) {
128 return ticket->data;
129 }
130 if (auto ticket = std::get_if<ECDSATicket>(&data)) {
131 return ticket->data;
132 }
133
134 UNREACHABLE();
135}
136
137u64 Ticket::GetSize() const {
138 const auto sig_type = GetSignatureType();
139
140 return sizeof(SignatureType) + GetSignatureTypeDataSize(sig_type) +
141 GetSignatureTypePaddingSize(sig_type) + sizeof(TicketData);
142}
143
144Ticket Ticket::SynthesizeCommon(Key128 title_key, const std::array<u8, 16>& rights_id) {
145 RSA2048Ticket out{};
146 out.sig_type = SignatureType::RSA_2048_SHA256;
147 out.data.rights_id = rights_id;
148 out.data.title_key_common = title_key;
149 return Ticket{out};
150}
151
58Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed) { 152Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed) {
59 Key128 out{}; 153 Key128 out{};
60 154
@@ -135,6 +229,27 @@ void KeyManager::DeriveGeneralPurposeKeys(std::size_t crypto_revision) {
135 } 229 }
136} 230}
137 231
232RSAKeyPair<2048> KeyManager::GetETicketRSAKey() const {
233 if (IsAllZeroArray(eticket_extended_kek) || !HasKey(S128KeyType::ETicketRSAKek))
234 return {};
235
236 const auto eticket_final = GetKey(S128KeyType::ETicketRSAKek);
237
238 std::vector<u8> extended_iv(eticket_extended_kek.begin(), eticket_extended_kek.begin() + 0x10);
239 std::array<u8, 0x230> extended_dec{};
240 AESCipher<Key128> rsa_1(eticket_final, Mode::CTR);
241 rsa_1.SetIV(extended_iv);
242 rsa_1.Transcode(eticket_extended_kek.data() + 0x10, eticket_extended_kek.size() - 0x10,
243 extended_dec.data(), Op::Decrypt);
244
245 RSAKeyPair<2048> rsa_key{};
246 std::memcpy(rsa_key.decryption_key.data(), extended_dec.data(), rsa_key.decryption_key.size());
247 std::memcpy(rsa_key.modulus.data(), extended_dec.data() + 0x100, rsa_key.modulus.size());
248 std::memcpy(rsa_key.exponent.data(), extended_dec.data() + 0x200, rsa_key.exponent.size());
249
250 return rsa_key;
251}
252
138Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source) { 253Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source) {
139 AESCipher<Key128> mac_cipher(keyblob_key, Mode::ECB); 254 AESCipher<Key128> mac_cipher(keyblob_key, Mode::ECB);
140 Key128 mac_key{}; 255 Key128 mac_key{};
@@ -237,7 +352,7 @@ Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& ke
237 return Loader::ResultStatus::Success; 352 return Loader::ResultStatus::Success;
238} 353}
239 354
240std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save) { 355std::vector<Ticket> GetTicketblob(const FileUtil::IOFile& ticket_save) {
241 if (!ticket_save.IsOpen()) 356 if (!ticket_save.IsOpen())
242 return {}; 357 return {};
243 358
@@ -246,14 +361,14 @@ std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save) {
246 return {}; 361 return {};
247 } 362 }
248 363
249 std::vector<TicketRaw> out; 364 std::vector<Ticket> out;
250 for (std::size_t offset = 0; offset + 0x4 < buffer.size(); ++offset) { 365 for (std::size_t offset = 0; offset + 0x4 < buffer.size(); ++offset) {
251 if (buffer[offset] == 0x4 && buffer[offset + 1] == 0x0 && buffer[offset + 2] == 0x1 && 366 if (buffer[offset] == 0x4 && buffer[offset + 1] == 0x0 && buffer[offset + 2] == 0x1 &&
252 buffer[offset + 3] == 0x0) { 367 buffer[offset + 3] == 0x0) {
253 out.emplace_back(); 368 out.emplace_back();
254 auto& next = out.back(); 369 auto& next = out.back();
255 std::memcpy(&next, buffer.data() + offset, sizeof(TicketRaw)); 370 std::memcpy(&next, buffer.data() + offset, sizeof(Ticket));
256 offset += next.size(); 371 offset += FULL_TICKET_SIZE;
257 } 372 }
258 } 373 }
259 374
@@ -305,29 +420,23 @@ static std::optional<u64> FindTicketOffset(const std::array<u8, size>& data) {
305 return offset; 420 return offset;
306} 421}
307 422
308std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, 423std::optional<std::pair<Key128, Key128>> ParseTicket(const Ticket& ticket,
309 const RSAKeyPair<2048>& key) { 424 const RSAKeyPair<2048>& key) {
310 u32 cert_authority; 425 const auto issuer = ticket.GetData().issuer;
311 std::memcpy(&cert_authority, ticket.data() + 0x140, sizeof(cert_authority)); 426 if (issuer == std::array<u8, 0x40>{})
312 if (cert_authority == 0)
313 return {}; 427 return {};
314 if (cert_authority != Common::MakeMagic('R', 'o', 'o', 't')) { 428 if (issuer[0] != 'R' || issuer[1] != 'o' || issuer[2] != 'o' || issuer[3] != 't') {
315 LOG_INFO(Crypto, 429 LOG_INFO(Crypto, "Attempting to parse ticket with non-standard certificate authority.");
316 "Attempting to parse ticket with non-standard certificate authority {:08X}.",
317 cert_authority);
318 } 430 }
319 431
320 Key128 rights_id; 432 Key128 rights_id = ticket.GetData().rights_id;
321 std::memcpy(rights_id.data(), ticket.data() + 0x2A0, sizeof(Key128));
322 433
323 if (rights_id == Key128{}) 434 if (rights_id == Key128{})
324 return {}; 435 return {};
325 436
326 Key128 key_temp{}; 437 if (!std::any_of(ticket.GetData().title_key_common_pad.begin(),
327 438 ticket.GetData().title_key_common_pad.end(), [](u8 b) { return b != 0; })) {
328 if (!std::any_of(ticket.begin() + 0x190, ticket.begin() + 0x280, [](u8 b) { return b != 0; })) { 439 return std::make_pair(rights_id, ticket.GetData().title_key_common);
329 std::memcpy(key_temp.data(), ticket.data() + 0x180, key_temp.size());
330 return std::make_pair(rights_id, key_temp);
331 } 440 }
332 441
333 mbedtls_mpi D; // RSA Private Exponent 442 mbedtls_mpi D; // RSA Private Exponent
@@ -342,7 +451,7 @@ std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
342 451
343 mbedtls_mpi_read_binary(&D, key.decryption_key.data(), key.decryption_key.size()); 452 mbedtls_mpi_read_binary(&D, key.decryption_key.data(), key.decryption_key.size());
344 mbedtls_mpi_read_binary(&N, key.modulus.data(), key.modulus.size()); 453 mbedtls_mpi_read_binary(&N, key.modulus.data(), key.modulus.size());
345 mbedtls_mpi_read_binary(&S, ticket.data() + 0x180, 0x100); 454 mbedtls_mpi_read_binary(&S, ticket.GetData().title_key_block.data(), 0x100);
346 455
347 mbedtls_mpi_exp_mod(&M, &S, &D, &N, nullptr); 456 mbedtls_mpi_exp_mod(&M, &S, &D, &N, nullptr);
348 457
@@ -366,6 +475,7 @@ std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
366 return {}; 475 return {};
367 ASSERT(*offset > 0); 476 ASSERT(*offset > 0);
368 477
478 Key128 key_temp{};
369 std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size()); 479 std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size());
370 480
371 return std::make_pair(rights_id, key_temp); 481 return std::make_pair(rights_id, key_temp);
@@ -450,6 +560,8 @@ void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
450 560
451 const auto index = std::stoul(out[0].substr(18, 2), nullptr, 16); 561 const auto index = std::stoul(out[0].substr(18, 2), nullptr, 16);
452 encrypted_keyblobs[index] = Common::HexStringToArray<0xB0>(out[1]); 562 encrypted_keyblobs[index] = Common::HexStringToArray<0xB0>(out[1]);
563 } else if (out[0].compare(0, 20, "eticket_extended_kek") == 0) {
564 eticket_extended_kek = Common::HexStringToArray<576>(out[1]);
453 } else { 565 } else {
454 for (const auto& kv : KEYS_VARIABLE_LENGTH) { 566 for (const auto& kv : KEYS_VARIABLE_LENGTH) {
455 if (!ValidCryptoRevisionString(out[0], kv.second.size(), 2)) 567 if (!ValidCryptoRevisionString(out[0], kv.second.size(), 2))
@@ -862,20 +974,19 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) {
862 // Titlekeys 974 // Titlekeys
863 data.DecryptProdInfo(GetBISKey(0)); 975 data.DecryptProdInfo(GetBISKey(0));
864 976
865 const auto eticket_extended_kek = data.GetETicketExtendedKek(); 977 eticket_extended_kek = data.GetETicketExtendedKek();
978 WriteKeyToFile(KeyCategory::Console, "eticket_extended_kek", eticket_extended_kek);
979 PopulateTickets();
980}
866 981
867 std::vector<u8> extended_iv(0x10); 982void KeyManager::PopulateTickets() {
868 std::memcpy(extended_iv.data(), eticket_extended_kek.data(), extended_iv.size()); 983 const auto rsa_key = GetETicketRSAKey();
869 std::array<u8, 0x230> extended_dec{};
870 AESCipher<Key128> rsa_1(eticket_final, Mode::CTR);
871 rsa_1.SetIV(extended_iv);
872 rsa_1.Transcode(eticket_extended_kek.data() + 0x10, eticket_extended_kek.size() - 0x10,
873 extended_dec.data(), Op::Decrypt);
874 984
875 RSAKeyPair<2048> rsa_key{}; 985 if (rsa_key == RSAKeyPair<2048>{})
876 std::memcpy(rsa_key.decryption_key.data(), extended_dec.data(), rsa_key.decryption_key.size()); 986 return;
877 std::memcpy(rsa_key.modulus.data(), extended_dec.data() + 0x100, rsa_key.modulus.size()); 987
878 std::memcpy(rsa_key.exponent.data(), extended_dec.data() + 0x200, rsa_key.exponent.size()); 988 if (!common_tickets.empty() && !personal_tickets.empty())
989 return;
879 990
880 const FileUtil::IOFile save1(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + 991 const FileUtil::IOFile save1(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
881 "/system/save/80000000000000e1", 992 "/system/save/80000000000000e1",
@@ -886,19 +997,41 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) {
886 997
887 const auto blob2 = GetTicketblob(save2); 998 const auto blob2 = GetTicketblob(save2);
888 auto res = GetTicketblob(save1); 999 auto res = GetTicketblob(save1);
1000 const auto idx = res.size();
889 res.insert(res.end(), blob2.begin(), blob2.end()); 1001 res.insert(res.end(), blob2.begin(), blob2.end());
890 1002
891 for (const auto& raw : res) { 1003 for (std::size_t i = 0; i < res.size(); ++i) {
892 const auto pair = ParseTicket(raw, rsa_key); 1004 const auto common = i < idx;
1005 const auto pair = ParseTicket(res[i], rsa_key);
893 if (!pair) 1006 if (!pair)
894 continue; 1007 continue;
895 const auto& [rid, key] = *pair; 1008 const auto& [rid, key] = *pair;
896 u128 rights_id; 1009 u128 rights_id;
897 std::memcpy(rights_id.data(), rid.data(), rid.size()); 1010 std::memcpy(rights_id.data(), rid.data(), rid.size());
1011
1012 if (common) {
1013 common_tickets[rights_id] = res[i];
1014 } else {
1015 personal_tickets[rights_id] = res[i];
1016 }
1017
898 SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); 1018 SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]);
899 } 1019 }
900} 1020}
901 1021
1022void KeyManager::SynthesizeTickets() {
1023 for (const auto& key : s128_keys) {
1024 if (key.first.type != S128KeyType::Titlekey) {
1025 continue;
1026 }
1027 u128 rights_id{key.first.field1, key.first.field2};
1028 Key128 rights_id_2;
1029 std::memcpy(rights_id_2.data(), rights_id.data(), rights_id_2.size());
1030 const auto ticket = Ticket::SynthesizeCommon(key.second, rights_id_2);
1031 common_tickets.insert_or_assign(rights_id, ticket);
1032 }
1033}
1034
902void KeyManager::SetKeyWrapped(S128KeyType id, Key128 key, u64 field1, u64 field2) { 1035void KeyManager::SetKeyWrapped(S128KeyType id, Key128 key, u64 field1, u64 field2) {
903 if (key == Key128{}) 1036 if (key == Key128{})
904 return; 1037 return;
@@ -997,6 +1130,46 @@ void KeyManager::PopulateFromPartitionData(PartitionDataManager& data) {
997 DeriveBase(); 1130 DeriveBase();
998} 1131}
999 1132
1133const std::map<u128, Ticket>& KeyManager::GetCommonTickets() const {
1134 return common_tickets;
1135}
1136
1137const std::map<u128, Ticket>& KeyManager::GetPersonalizedTickets() const {
1138 return personal_tickets;
1139}
1140
1141bool KeyManager::AddTicketCommon(Ticket raw) {
1142 const auto rsa_key = GetETicketRSAKey();
1143 if (rsa_key == RSAKeyPair<2048>{})
1144 return false;
1145
1146 const auto pair = ParseTicket(raw, rsa_key);
1147 if (!pair)
1148 return false;
1149 const auto& [rid, key] = *pair;
1150 u128 rights_id;
1151 std::memcpy(rights_id.data(), rid.data(), rid.size());
1152 common_tickets[rights_id] = raw;
1153 SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]);
1154 return true;
1155}
1156
1157bool KeyManager::AddTicketPersonalized(Ticket raw) {
1158 const auto rsa_key = GetETicketRSAKey();
1159 if (rsa_key == RSAKeyPair<2048>{})
1160 return false;
1161
1162 const auto pair = ParseTicket(raw, rsa_key);
1163 if (!pair)
1164 return false;
1165 const auto& [rid, key] = *pair;
1166 u128 rights_id;
1167 std::memcpy(rights_id.data(), rid.data(), rid.size());
1168 common_tickets[rights_id] = raw;
1169 SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]);
1170 return true;
1171}
1172
1000const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager::s128_file_id = { 1173const boost::container::flat_map<std::string, KeyIndex<S128KeyType>> KeyManager::s128_file_id = {
1001 {"eticket_rsa_kek", {S128KeyType::ETicketRSAKek, 0, 0}}, 1174 {"eticket_rsa_kek", {S128KeyType::ETicketRSAKek, 0, 0}},
1002 {"eticket_rsa_kek_source", 1175 {"eticket_rsa_kek_source",
diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h
index 22f268c65..7265c4171 100644
--- a/src/core/crypto/key_manager.h
+++ b/src/core/crypto/key_manager.h
@@ -9,8 +9,10 @@
9#include <optional> 9#include <optional>
10#include <string> 10#include <string>
11 11
12#include <variant>
12#include <boost/container/flat_map.hpp> 13#include <boost/container/flat_map.hpp>
13#include <fmt/format.h> 14#include <fmt/format.h>
15#include "common/common_funcs.h"
14#include "common/common_types.h" 16#include "common/common_types.h"
15#include "core/crypto/partition_data_manager.h" 17#include "core/crypto/partition_data_manager.h"
16#include "core/file_sys/vfs_types.h" 18#include "core/file_sys/vfs_types.h"
@@ -30,7 +32,79 @@ constexpr u64 TICKET_FILE_TITLEKEY_OFFSET = 0x180;
30using Key128 = std::array<u8, 0x10>; 32using Key128 = std::array<u8, 0x10>;
31using Key256 = std::array<u8, 0x20>; 33using Key256 = std::array<u8, 0x20>;
32using SHA256Hash = std::array<u8, 0x20>; 34using SHA256Hash = std::array<u8, 0x20>;
33using TicketRaw = std::array<u8, 0x400>; 35
36enum class SignatureType {
37 RSA_4096_SHA1 = 0x10000,
38 RSA_2048_SHA1 = 0x10001,
39 ECDSA_SHA1 = 0x10002,
40 RSA_4096_SHA256 = 0x10003,
41 RSA_2048_SHA256 = 0x10004,
42 ECDSA_SHA256 = 0x10005,
43};
44
45u64 GetSignatureTypeDataSize(SignatureType type);
46u64 GetSignatureTypePaddingSize(SignatureType type);
47
48enum class TitleKeyType : u8 {
49 Common = 0,
50 Personalized = 1,
51};
52
53struct TicketData {
54 std::array<u8, 0x40> issuer;
55 union {
56 std::array<u8, 0x100> title_key_block;
57
58 struct {
59 Key128 title_key_common;
60 std::array<u8, 0xF0> title_key_common_pad;
61 };
62 };
63
64 INSERT_PADDING_BYTES(0x1);
65 TitleKeyType type;
66 INSERT_PADDING_BYTES(0x3);
67 u8 revision;
68 INSERT_PADDING_BYTES(0xA);
69 u64 ticket_id;
70 u64 device_id;
71 std::array<u8, 0x10> rights_id;
72 u32 account_id;
73 INSERT_PADDING_BYTES(0x14C);
74};
75static_assert(sizeof(TicketData) == 0x2C0, "TicketData has incorrect size.");
76
77struct RSA4096Ticket {
78 SignatureType sig_type;
79 std::array<u8, 0x200> sig_data;
80 INSERT_PADDING_BYTES(0x3C);
81 TicketData data;
82};
83
84struct RSA2048Ticket {
85 SignatureType sig_type;
86 std::array<u8, 0x100> sig_data;
87 INSERT_PADDING_BYTES(0x3C);
88 TicketData data;
89};
90
91struct ECDSATicket {
92 SignatureType sig_type;
93 std::array<u8, 0x3C> sig_data;
94 INSERT_PADDING_BYTES(0x40);
95 TicketData data;
96};
97
98struct Ticket {
99 std::variant<RSA4096Ticket, RSA2048Ticket, ECDSATicket> data;
100
101 SignatureType GetSignatureType() const;
102 TicketData& GetData();
103 const TicketData& GetData() const;
104 u64 GetSize() const;
105
106 static Ticket SynthesizeCommon(Key128 title_key, const std::array<u8, 0x10>& rights_id);
107};
34 108
35static_assert(sizeof(Key128) == 16, "Key128 must be 128 bytes big."); 109static_assert(sizeof(Key128) == 16, "Key128 must be 128 bytes big.");
36static_assert(sizeof(Key256) == 32, "Key256 must be 256 bytes big."); 110static_assert(sizeof(Key256) == 32, "Key256 must be 256 bytes big.");
@@ -43,6 +117,19 @@ struct RSAKeyPair {
43 std::array<u8, 4> exponent; 117 std::array<u8, 4> exponent;
44}; 118};
45 119
120template <size_t bit_size, size_t byte_size>
121bool operator==(const RSAKeyPair<bit_size, byte_size>& lhs,
122 const RSAKeyPair<bit_size, byte_size>& rhs) {
123 return std::tie(lhs.encryption_key, lhs.decryption_key, lhs.modulus, lhs.exponent) ==
124 std::tie(rhs.encryption_key, rhs.decryption_key, rhs.modulus, rhs.exponent);
125}
126
127template <size_t bit_size, size_t byte_size>
128bool operator!=(const RSAKeyPair<bit_size, byte_size>& lhs,
129 const RSAKeyPair<bit_size, byte_size>& rhs) {
130 return !(lhs == rhs);
131}
132
46enum class KeyCategory : u8 { 133enum class KeyCategory : u8 {
47 Standard, 134 Standard,
48 Title, 135 Title,
@@ -151,22 +238,35 @@ public:
151 238
152 static bool KeyFileExists(bool title); 239 static bool KeyFileExists(bool title);
153 240
154 // Call before using the sd seed to attempt to derive it if it dosen't exist. Needs system save 241 // Call before using the sd seed to attempt to derive it if it dosen't exist. Needs system
155 // 8*43 and the private file to exist. 242 // save 8*43 and the private file to exist.
156 void DeriveSDSeedLazy(); 243 void DeriveSDSeedLazy();
157 244
158 bool BaseDeriveNecessary() const; 245 bool BaseDeriveNecessary() const;
159 void DeriveBase(); 246 void DeriveBase();
160 void DeriveETicket(PartitionDataManager& data); 247 void DeriveETicket(PartitionDataManager& data);
248 void PopulateTickets();
249 void SynthesizeTickets();
161 250
162 void PopulateFromPartitionData(PartitionDataManager& data); 251 void PopulateFromPartitionData(PartitionDataManager& data);
163 252
253 const std::map<u128, Ticket>& GetCommonTickets() const;
254 const std::map<u128, Ticket>& GetPersonalizedTickets() const;
255
256 bool AddTicketCommon(Ticket raw);
257 bool AddTicketPersonalized(Ticket raw);
258
164private: 259private:
165 std::map<KeyIndex<S128KeyType>, Key128> s128_keys; 260 std::map<KeyIndex<S128KeyType>, Key128> s128_keys;
166 std::map<KeyIndex<S256KeyType>, Key256> s256_keys; 261 std::map<KeyIndex<S256KeyType>, Key256> s256_keys;
167 262
263 // Map from rights ID to ticket
264 std::map<u128, Ticket> common_tickets;
265 std::map<u128, Ticket> personal_tickets;
266
168 std::array<std::array<u8, 0xB0>, 0x20> encrypted_keyblobs{}; 267 std::array<std::array<u8, 0xB0>, 0x20> encrypted_keyblobs{};
169 std::array<std::array<u8, 0x90>, 0x20> keyblobs{}; 268 std::array<std::array<u8, 0x90>, 0x20> keyblobs{};
269 std::array<u8, 576> eticket_extended_kek{};
170 270
171 bool dev_mode; 271 bool dev_mode;
172 void LoadFromFile(const std::string& filename, bool is_title_keys); 272 void LoadFromFile(const std::string& filename, bool is_title_keys);
@@ -178,6 +278,8 @@ private:
178 278
179 void DeriveGeneralPurposeKeys(std::size_t crypto_revision); 279 void DeriveGeneralPurposeKeys(std::size_t crypto_revision);
180 280
281 RSAKeyPair<2048> GetETicketRSAKey() const;
282
181 void SetKeyWrapped(S128KeyType id, Key128 key, u64 field1 = 0, u64 field2 = 0); 283 void SetKeyWrapped(S128KeyType id, Key128 key, u64 field1 = 0, u64 field2 = 0);
182 void SetKeyWrapped(S256KeyType id, Key256 key, u64 field1 = 0, u64 field2 = 0); 284 void SetKeyWrapped(S256KeyType id, Key256 key, u64 field1 = 0, u64 field2 = 0);
183 285
@@ -195,11 +297,11 @@ std::array<u8, 0x90> DecryptKeyblob(const std::array<u8, 0xB0>& encrypted_keyblo
195std::optional<Key128> DeriveSDSeed(); 297std::optional<Key128> DeriveSDSeed();
196Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys); 298Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys);
197 299
198std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save); 300std::vector<Ticket> GetTicketblob(const FileUtil::IOFile& ticket_save);
199 301
200// Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority (offset 302// Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority
201// 0x140-0x144 is zero) 303// (offset 0x140-0x144 is zero)
202std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket, 304std::optional<std::pair<Key128, Key128>> ParseTicket(const Ticket& ticket,
203 const RSAKeyPair<2048>& eticket_extended_key); 305 const RSAKeyPair<2048>& eticket_extended_key);
204 306
205} // namespace Core::Crypto 307} // namespace Core::Crypto
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index eb76174c5..7310b3602 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
94 return aci_file_access.permissions; 94 return aci_file_access.permissions;
95} 95}
96 96
97u32 ProgramMetadata::GetSystemResourceSize() const {
98 return npdm_header.system_resource_size;
99}
100
97const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { 101const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
98 return aci_kernel_capabilities; 102 return aci_kernel_capabilities;
99} 103}
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 43bf2820a..88ec97d85 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,6 +58,7 @@ public:
58 u32 GetMainThreadStackSize() const; 58 u32 GetMainThreadStackSize() const;
59 u64 GetTitleID() const; 59 u64 GetTitleID() const;
60 u64 GetFilesystemPermissions() const; 60 u64 GetFilesystemPermissions() const;
61 u32 GetSystemResourceSize() const;
61 const KernelCapabilityDescriptors& GetKernelCapabilities() const; 62 const KernelCapabilityDescriptors& GetKernelCapabilities() const;
62 63
63 void Print() const; 64 void Print() const;
@@ -76,7 +77,8 @@ private:
76 u8 reserved_3; 77 u8 reserved_3;
77 u8 main_thread_priority; 78 u8 main_thread_priority;
78 u8 main_thread_cpu; 79 u8 main_thread_cpu;
79 std::array<u8, 8> reserved_4; 80 std::array<u8, 4> reserved_4;
81 u32_le system_resource_size;
80 u32_le process_category; 82 u32_le process_category;
81 u32_le main_stack_size; 83 u32_le main_stack_size;
82 std::array<u8, 0x10> application_name; 84 std::array<u8, 0x10> application_name;
diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp
new file mode 100644
index 000000000..6a9add87c
--- /dev/null
+++ b/src/core/file_sys/system_archive/mii_model.cpp
@@ -0,0 +1,46 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/file_sys/system_archive/mii_model.h"
6#include "core/file_sys/vfs_vector.h"
7
8namespace FileSys::SystemArchive {
9
10namespace MiiModelData {
11
12constexpr std::array<u8, 0x10> NFTR_STANDARD{'N', 'F', 'T', 'R', 0x01, 0x00, 0x00, 0x00,
13 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
14constexpr std::array<u8, 0x10> NFSR_STANDARD{'N', 'F', 'S', 'R', 0x01, 0x00, 0x00, 0x00,
15 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
16
17constexpr auto TEXTURE_LOW_LINEAR = NFTR_STANDARD;
18constexpr auto TEXTURE_LOW_SRGB = NFTR_STANDARD;
19constexpr auto TEXTURE_MID_LINEAR = NFTR_STANDARD;
20constexpr auto TEXTURE_MID_SRGB = NFTR_STANDARD;
21constexpr auto SHAPE_HIGH = NFSR_STANDARD;
22constexpr auto SHAPE_MID = NFSR_STANDARD;
23
24} // namespace MiiModelData
25
26VirtualDir MiiModel() {
27 auto out = std::make_shared<VectorVfsDirectory>(std::vector<VirtualFile>{},
28 std::vector<VirtualDir>{}, "data");
29
30 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_LOW_LINEAR.size()>>(
31 MiiModelData::TEXTURE_LOW_LINEAR, "NXTextureLowLinear.dat"));
32 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_LOW_SRGB.size()>>(
33 MiiModelData::TEXTURE_LOW_SRGB, "NXTextureLowSRGB.dat"));
34 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_MID_LINEAR.size()>>(
35 MiiModelData::TEXTURE_MID_LINEAR, "NXTextureMidLinear.dat"));
36 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::TEXTURE_MID_SRGB.size()>>(
37 MiiModelData::TEXTURE_MID_SRGB, "NXTextureMidSRGB.dat"));
38 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_HIGH.size()>>(
39 MiiModelData::SHAPE_HIGH, "ShapeHigh.dat"));
40 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
41 MiiModelData::SHAPE_MID, "ShapeMid.dat"));
42
43 return std::move(out);
44}
45
46} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/mii_model.h b/src/core/file_sys/system_archive/mii_model.h
new file mode 100644
index 000000000..6c2d9398b
--- /dev/null
+++ b/src/core/file_sys/system_archive/mii_model.h
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/file_sys/vfs_types.h"
8
9namespace FileSys::SystemArchive {
10
11VirtualDir MiiModel();
12
13} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index c9722ed77..6d8445383 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/file_sys/romfs.h" 6#include "core/file_sys/romfs.h"
7#include "core/file_sys/system_archive/mii_model.h"
7#include "core/file_sys/system_archive/ng_word.h" 8#include "core/file_sys/system_archive/ng_word.h"
8#include "core/file_sys/system_archive/system_archive.h" 9#include "core/file_sys/system_archive/system_archive.h"
9#include "core/file_sys/system_archive/system_version.h" 10#include "core/file_sys/system_archive/system_version.h"
@@ -24,7 +25,7 @@ struct SystemArchiveDescriptor {
24constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{ 25constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{
25 {0x0100000000000800, "CertStore", nullptr}, 26 {0x0100000000000800, "CertStore", nullptr},
26 {0x0100000000000801, "ErrorMessage", nullptr}, 27 {0x0100000000000801, "ErrorMessage", nullptr},
27 {0x0100000000000802, "MiiModel", nullptr}, 28 {0x0100000000000802, "MiiModel", &MiiModel},
28 {0x0100000000000803, "BrowserDll", nullptr}, 29 {0x0100000000000803, "BrowserDll", nullptr},
29 {0x0100000000000804, "Help", nullptr}, 30 {0x0100000000000804, "Help", nullptr},
30 {0x0100000000000805, "SharedFont", nullptr}, 31 {0x0100000000000805, "SharedFont", nullptr},
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp
new file mode 100644
index 000000000..c2115db2d
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -0,0 +1,30 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/core_timing.h"
7#include "core/hardware_interrupt_manager.h"
8#include "core/hle/service/nvdrv/interface.h"
9#include "core/hle/service/sm/sm.h"
10
11namespace Core::Hardware {
12
13InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
14 gpu_interrupt_event =
15 system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) {
16 auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
17 const u32 syncpt = static_cast<u32>(message >> 32);
18 const u32 value = static_cast<u32>(message);
19 nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
20 });
21}
22
23InterruptManager::~InterruptManager() = default;
24
25void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
26 const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
27 system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg);
28}
29
30} // namespace Core::Hardware
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h
new file mode 100644
index 000000000..494db883a
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.h
@@ -0,0 +1,31 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Core {
10class System;
11}
12
13namespace Core::Timing {
14struct EventType;
15}
16
17namespace Core::Hardware {
18
19class InterruptManager {
20public:
21 explicit InterruptManager(Core::System& system);
22 ~InterruptManager();
23
24 void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
25
26private:
27 Core::System& system;
28 Core::Timing::EventType* gpu_interrupt_event{};
29};
30
31} // namespace Core::Hardware
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index 879957dcb..d8ad54030 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
13 14
@@ -77,7 +78,7 @@ struct CodeSet final {
77 } 78 }
78 79
79 /// The overall data that backs this code set. 80 /// The overall data that backs this code set.
80 std::vector<u8> memory; 81 Kernel::PhysicalMemory memory;
81 82
82 /// The segments that comprise this code set. 83 /// The segments that comprise this code set.
83 std::array<Segment, 3> segments; 84 std::array<Segment, 3> segments;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
new file mode 100644
index 000000000..090565310
--- /dev/null
+++ b/src/core/hle/kernel/physical_memory.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/alignment.h"
8
9namespace Kernel {
10
11// This encapsulation serves 2 purposes:
12// - First, to encapsulate host physical memory under a single type and set an
13// standard for managing it.
14// - Second to ensure all host backing memory used is aligned to 256 bytes due
15// to strict alignment restrictions on GPU memory.
16
17using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
18
19} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 7cfc513a1..e80a12ac3 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <bitset>
6#include <memory> 7#include <memory>
7#include <random> 8#include <random>
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -48,8 +49,58 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
48} 49}
49} // Anonymous namespace 50} // Anonymous namespace
50 51
51SharedPtr<Process> Process::Create(Core::System& system, std::string name, 52// Represents a page used for thread-local storage.
52 Process::ProcessType type) { 53//
54// Each TLS page contains slots that may be used by processes and threads.
55// Every process and thread is created with a slot in some arbitrary page
56// (whichever page happens to have an available slot).
57class TLSPage {
58public:
59 static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE;
60
61 explicit TLSPage(VAddr address) : base_address{address} {}
62
63 bool HasAvailableSlots() const {
64 return !is_slot_used.all();
65 }
66
67 VAddr GetBaseAddress() const {
68 return base_address;
69 }
70
71 std::optional<VAddr> ReserveSlot() {
72 for (std::size_t i = 0; i < is_slot_used.size(); i++) {
73 if (is_slot_used[i]) {
74 continue;
75 }
76
77 is_slot_used[i] = true;
78 return base_address + (i * Memory::TLS_ENTRY_SIZE);
79 }
80
81 return std::nullopt;
82 }
83
84 void ReleaseSlot(VAddr address) {
85 // Ensure that all given addresses are consistent with how TLS pages
86 // are intended to be used when releasing slots.
87 ASSERT(IsWithinPage(address));
88 ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0);
89
90 const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE;
91 is_slot_used[index] = false;
92 }
93
94private:
95 bool IsWithinPage(VAddr address) const {
96 return base_address <= address && address < base_address + Memory::PAGE_SIZE;
97 }
98
99 VAddr base_address;
100 std::bitset<num_slot_entries> is_slot_used;
101};
102
103SharedPtr<Process> Process::Create(Core::System& system, std::string name, ProcessType type) {
53 auto& kernel = system.Kernel(); 104 auto& kernel = system.Kernel();
54 105
55 SharedPtr<Process> process(new Process(system)); 106 SharedPtr<Process> process(new Process(system));
@@ -78,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
78 return vm_manager.GetTotalPhysicalMemoryAvailable(); 129 return vm_manager.GetTotalPhysicalMemoryAvailable();
79} 130}
80 131
81u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { 132u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
82 // TODO: Subtract the personal heap size from this when the 133 return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
83 // personal heap is implemented.
84 return GetTotalPhysicalMemoryAvailable();
85} 134}
86 135
87u64 Process::GetTotalPhysicalMemoryUsed() const { 136u64 Process::GetTotalPhysicalMemoryUsed() const {
88 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; 137 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
138 GetSystemResourceUsage();
89} 139}
90 140
91u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { 141u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
92 // TODO: Subtract the personal heap size from this when the 142 return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
93 // personal heap is implemented.
94 return GetTotalPhysicalMemoryUsed();
95} 143}
96 144
97void Process::RegisterThread(const Thread* thread) { 145void Process::RegisterThread(const Thread* thread) {
@@ -121,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
121 program_id = metadata.GetTitleID(); 169 program_id = metadata.GetTitleID();
122 ideal_core = metadata.GetMainThreadCore(); 170 ideal_core = metadata.GetMainThreadCore();
123 is_64bit_process = metadata.Is64BitProgram(); 171 is_64bit_process = metadata.Is64BitProgram();
172 system_resource_size = metadata.GetSystemResourceSize();
124 173
125 vm_manager.Reset(metadata.GetAddressSpaceType()); 174 vm_manager.Reset(metadata.GetAddressSpaceType());
126 175
@@ -135,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
135} 184}
136 185
137void Process::Run(s32 main_thread_priority, u64 stack_size) { 186void Process::Run(s32 main_thread_priority, u64 stack_size) {
138 // The kernel always ensures that the given stack size is page aligned. 187 AllocateMainThreadStack(stack_size);
139 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); 188 tls_region_address = CreateTLSRegion();
140
141 // Allocate and map the main thread stack
142 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
143 // of the user address space.
144 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
145 vm_manager
146 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
147 0, main_thread_stack_size, MemoryState::Stack)
148 .Unwrap();
149 189
150 vm_manager.LogLayout(); 190 vm_manager.LogLayout();
191
151 ChangeStatus(ProcessStatus::Running); 192 ChangeStatus(ProcessStatus::Running);
152 193
153 SetupMainThread(*this, kernel, main_thread_priority); 194 SetupMainThread(*this, kernel, main_thread_priority);
@@ -177,69 +218,66 @@ void Process::PrepareForTermination() {
177 stop_threads(system.Scheduler(2).GetThreadList()); 218 stop_threads(system.Scheduler(2).GetThreadList());
178 stop_threads(system.Scheduler(3).GetThreadList()); 219 stop_threads(system.Scheduler(3).GetThreadList());
179 220
221 FreeTLSRegion(tls_region_address);
222 tls_region_address = 0;
223
180 ChangeStatus(ProcessStatus::Exited); 224 ChangeStatus(ProcessStatus::Exited);
181} 225}
182 226
183/** 227/**
184 * Finds a free location for the TLS section of a thread. 228 * Attempts to find a TLS page that contains a free slot for
185 * @param tls_slots The TLS page array of the thread's owner process. 229 * use by a thread.
186 * Returns a tuple of (page, slot, alloc_needed) where: 230 *
187 * page: The index of the first allocated TLS page that has free slots. 231 * @returns If a page with an available slot is found, then an iterator
188 * slot: The index of the first free slot in the indicated page. 232 * pointing to the page is returned. Otherwise the end iterator
189 * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). 233 * is returned instead.
190 */ 234 */
191static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot( 235static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
192 const std::vector<std::bitset<8>>& tls_slots) { 236 return std::find_if(tls_pages.begin(), tls_pages.end(),
193 // Iterate over all the allocated pages, and try to find one where not all slots are used. 237 [](const auto& page) { return page.HasAvailableSlots(); });
194 for (std::size_t page = 0; page < tls_slots.size(); ++page) {
195 const auto& page_tls_slots = tls_slots[page];
196 if (!page_tls_slots.all()) {
197 // We found a page with at least one free slot, find which slot it is
198 for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
199 if (!page_tls_slots.test(slot)) {
200 return std::make_tuple(page, slot, false);
201 }
202 }
203 }
204 }
205
206 return std::make_tuple(0, 0, true);
207} 238}
208 239
209VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) { 240VAddr Process::CreateTLSRegion() {
210 auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots); 241 auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages);
211 const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress();
212 242
213 if (needs_allocation) { 243 if (tls_page_iter == tls_pages.cend()) {
214 tls_slots.emplace_back(0); // The page is completely available at the start 244 const auto region_address =
215 available_page = tls_slots.size() - 1; 245 vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(),
216 available_slot = 0; // Use the first slot in the new page 246 vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE);
247 ASSERT(region_address.Succeeded());
217 248
218 // Allocate some memory from the end of the linear heap for this region. 249 const auto map_result = vm_manager.MapMemoryBlock(
219 auto& tls_memory = thread.GetTLSMemory(); 250 *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0,
220 tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0); 251 Memory::PAGE_SIZE, MemoryState::ThreadLocal);
252 ASSERT(map_result.Succeeded());
221 253
222 vm_manager.RefreshMemoryBlockMappings(tls_memory.get()); 254 tls_pages.emplace_back(*region_address);
223 255
224 vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0, 256 const auto reserve_result = tls_pages.back().ReserveSlot();
225 Memory::PAGE_SIZE, MemoryState::ThreadLocal); 257 ASSERT(reserve_result.has_value());
226 }
227 258
228 tls_slots[available_page].set(available_slot); 259 return *reserve_result;
260 }
229 261
230 return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE; 262 return *tls_page_iter->ReserveSlot();
231} 263}
232 264
233void Process::FreeTLSSlot(VAddr tls_address) { 265void Process::FreeTLSRegion(VAddr tls_address) {
234 const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress(); 266 const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE);
235 const VAddr tls_page = tls_base / Memory::PAGE_SIZE; 267 auto iter =
236 const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; 268 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
269 return page.GetBaseAddress() == aligned_address;
270 });
237 271
238 tls_slots[tls_page].reset(tls_slot); 272 // Something has gone very wrong if we're freeing a region
273 // with no actual page available.
274 ASSERT(iter != tls_pages.cend());
275
276 iter->ReleaseSlot(tls_address);
239} 277}
240 278
241void Process::LoadModule(CodeSet module_, VAddr base_addr) { 279void Process::LoadModule(CodeSet module_, VAddr base_addr) {
242 const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); 280 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
243 281
244 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, 282 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
245 MemoryState memory_state) { 283 MemoryState memory_state) {
@@ -282,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
282 WakeupAllWaitingThreads(); 320 WakeupAllWaitingThreads();
283} 321}
284 322
323void Process::AllocateMainThreadStack(u64 stack_size) {
324 // The kernel always ensures that the given stack size is page aligned.
325 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
326
327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap();
333}
334
285} // namespace Kernel 335} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 248fd3840..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
9#include <cstddef> 8#include <cstddef>
10#include <list> 9#include <list>
11#include <string> 10#include <string>
@@ -32,6 +31,7 @@ namespace Kernel {
32class KernelCore; 31class KernelCore;
33class ResourceLimit; 32class ResourceLimit;
34class Thread; 33class Thread;
34class TLSPage;
35 35
36struct CodeSet; 36struct CodeSet;
37 37
@@ -135,6 +135,11 @@ public:
135 return mutex; 135 return mutex;
136 } 136 }
137 137
138 /// Gets the address to the process' dedicated TLS region.
139 VAddr GetTLSRegionAddress() const {
140 return tls_region_address;
141 }
142
138 /// Gets the current status of the process 143 /// Gets the current status of the process
139 ProcessStatus GetStatus() const { 144 ProcessStatus GetStatus() const {
140 return status; 145 return status;
@@ -168,8 +173,24 @@ public:
168 return capabilities.GetPriorityMask(); 173 return capabilities.GetPriorityMask();
169 } 174 }
170 175
171 u32 IsVirtualMemoryEnabled() const { 176 /// Gets the amount of secure memory to allocate for memory management.
172 return is_virtual_address_memory_enabled; 177 u32 GetSystemResourceSize() const {
178 return system_resource_size;
179 }
180
181 /// Gets the amount of secure memory currently in use for memory management.
182 u32 GetSystemResourceUsage() const {
183 // On hardware, this returns the amount of system resource memory that has
184 // been used by the kernel. This is problematic for Yuzu to emulate, because
185 // system resource memory is used for page tables -- and yuzu doesn't really
186 // have a way to calculate how much memory is required for page tables for
187 // the current process at any given time.
188 // TODO: Is this even worth implementing? Games may retrieve this value via
189 // an SDK function that gets used + available system resource size for debug
190 // or diagnostic purposes. However, it seems unlikely that a game would make
191 // decisions based on how much system memory is dedicated to its page tables.
192 // Is returning a value other than zero wise?
193 return 0;
173 } 194 }
174 195
175 /// Whether this process is an AArch64 or AArch32 process. 196 /// Whether this process is an AArch64 or AArch32 process.
@@ -196,15 +217,15 @@ public:
196 u64 GetTotalPhysicalMemoryAvailable() const; 217 u64 GetTotalPhysicalMemoryAvailable() const;
197 218
198 /// Retrieves the total physical memory available to this process in bytes, 219 /// Retrieves the total physical memory available to this process in bytes,
199 /// without the size of the personal heap added to it. 220 /// without the size of the personal system resource heap added to it.
200 u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; 221 u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
201 222
202 /// Retrieves the total physical memory used by this process in bytes. 223 /// Retrieves the total physical memory used by this process in bytes.
203 u64 GetTotalPhysicalMemoryUsed() const; 224 u64 GetTotalPhysicalMemoryUsed() const;
204 225
205 /// Retrieves the total physical memory used by this process in bytes, 226 /// Retrieves the total physical memory used by this process in bytes,
206 /// without the size of the personal heap added to it. 227 /// without the size of the personal system resource heap added to it.
207 u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; 228 u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
208 229
209 /// Gets the list of all threads created with this process as their owner. 230 /// Gets the list of all threads created with this process as their owner.
210 const std::list<const Thread*>& GetThreadList() const { 231 const std::list<const Thread*>& GetThreadList() const {
@@ -260,10 +281,10 @@ public:
260 // Thread-local storage management 281 // Thread-local storage management
261 282
262 // Marks the next available region as used and returns the address of the slot. 283 // Marks the next available region as used and returns the address of the slot.
263 VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread); 284 [[nodiscard]] VAddr CreateTLSRegion();
264 285
265 // Frees a used TLS slot identified by the given address 286 // Frees a used TLS slot identified by the given address
266 void FreeTLSSlot(VAddr tls_address); 287 void FreeTLSRegion(VAddr tls_address);
267 288
268private: 289private:
269 explicit Process(Core::System& system); 290 explicit Process(Core::System& system);
@@ -280,6 +301,9 @@ private:
280 /// a process signal. 301 /// a process signal.
281 void ChangeStatus(ProcessStatus new_status); 302 void ChangeStatus(ProcessStatus new_status);
282 303
304 /// Allocates the main thread stack for the process, given the stack size in bytes.
305 void AllocateMainThreadStack(u64 stack_size);
306
283 /// Memory manager for this process. 307 /// Memory manager for this process.
284 Kernel::VMManager vm_manager; 308 Kernel::VMManager vm_manager;
285 309
@@ -290,7 +314,7 @@ private:
290 u64 code_memory_size = 0; 314 u64 code_memory_size = 0;
291 315
292 /// Current status of the process 316 /// Current status of the process
293 ProcessStatus status; 317 ProcessStatus status{};
294 318
295 /// The ID of this process 319 /// The ID of this process
296 u64 process_id = 0; 320 u64 process_id = 0;
@@ -298,19 +322,23 @@ private:
298 /// Title ID corresponding to the process 322 /// Title ID corresponding to the process
299 u64 program_id = 0; 323 u64 program_id = 0;
300 324
325 /// Specifies additional memory to be reserved for the process's memory management by the
326 /// system. When this is non-zero, secure memory is allocated and used for page table allocation
327 /// instead of using the normal global page tables/memory block management.
328 u32 system_resource_size = 0;
329
301 /// Resource limit descriptor for this process 330 /// Resource limit descriptor for this process
302 SharedPtr<ResourceLimit> resource_limit; 331 SharedPtr<ResourceLimit> resource_limit;
303 332
304 /// The ideal CPU core for this process, threads are scheduled on this core by default. 333 /// The ideal CPU core for this process, threads are scheduled on this core by default.
305 u8 ideal_core = 0; 334 u8 ideal_core = 0;
306 u32 is_virtual_address_memory_enabled = 0;
307 335
308 /// The Thread Local Storage area is allocated as processes create threads, 336 /// The Thread Local Storage area is allocated as processes create threads,
309 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part 337 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
310 /// holds the TLS for a specific thread. This vector contains which parts are in use for each 338 /// holds the TLS for a specific thread. This vector contains which parts are in use for each
311 /// page as a bitmask. 339 /// page as a bitmask.
312 /// This vector will grow as more pages are allocated for new threads. 340 /// This vector will grow as more pages are allocated for new threads.
313 std::vector<std::bitset<8>> tls_slots; 341 std::vector<TLSPage> tls_pages;
314 342
315 /// Contains the parsed process capability descriptors. 343 /// Contains the parsed process capability descriptors.
316 ProcessCapabilities capabilities; 344 ProcessCapabilities capabilities;
@@ -338,8 +366,11 @@ private:
338 /// variable related facilities. 366 /// variable related facilities.
339 Mutex mutex; 367 Mutex mutex;
340 368
369 /// Address indicating the location of the process' dedicated TLS region.
370 VAddr tls_region_address = 0;
371
341 /// Random values for svcGetInfo RandomEntropy 372 /// Random values for svcGetInfo RandomEntropy
342 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; 373 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
343 374
344 /// List of threads that are running with this process as their owner. 375 /// List of threads that are running with this process as their owner.
345 std::list<const Thread*> thread_list; 376 std::list<const Thread*> thread_list;
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index f15c5ee36..a815c4eea 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
28 shared_memory->other_permissions = other_permissions; 28 shared_memory->other_permissions = other_permissions;
29 29
30 if (address == 0) { 30 if (address == 0) {
31 shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); 31 shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size);
32 shared_memory->backing_block_offset = 0; 32 shared_memory->backing_block_offset = 0;
33 33
34 // Refresh the address mappings for the current process. 34 // Refresh the address mappings for the current process.
@@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
59} 59}
60 60
61SharedPtr<SharedMemory> SharedMemory::CreateForApplet( 61SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
62 KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, 62 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
63 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { 63 u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); 64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));
65 65
66 shared_memory->owner_process = nullptr; 66 shared_memory->owner_process = nullptr;
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index c2b6155e1..01ca6dcd2 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -10,6 +10,7 @@
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
13#include "core/hle/kernel/physical_memory.h"
13#include "core/hle/kernel/process.h" 14#include "core/hle/kernel/process.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -62,12 +63,10 @@ public:
62 * block. 63 * block.
63 * @param name Optional object name, used for debugging purposes. 64 * @param name Optional object name, used for debugging purposes.
64 */ 65 */
65 static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, 66 static SharedPtr<SharedMemory> CreateForApplet(
66 std::shared_ptr<std::vector<u8>> heap_block, 67 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
67 std::size_t offset, u64 size, 68 u64 size, MemoryPermission permissions, MemoryPermission other_permissions,
68 MemoryPermission permissions, 69 std::string name = "Unknown Applet");
69 MemoryPermission other_permissions,
70 std::string name = "Unknown Applet");
71 70
72 std::string GetTypeName() const override { 71 std::string GetTypeName() const override {
73 return "SharedMemory"; 72 return "SharedMemory";
@@ -135,7 +134,7 @@ private:
135 ~SharedMemory() override; 134 ~SharedMemory() override;
136 135
137 /// Backing memory for this shared memory block. 136 /// Backing memory for this shared memory block.
138 std::shared_ptr<std::vector<u8>> backing_block; 137 std::shared_ptr<PhysicalMemory> backing_block;
139 /// Offset into the backing block for this shared memory. 138 /// Offset into the backing block for this shared memory.
140 std::size_t backing_block_offset = 0; 139 std::size_t backing_block_offset = 0;
141 /// Size of the memory block. Page-aligned. 140 /// Size of the memory block. Page-aligned.
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index de6363ff2..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -98,9 +98,9 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
98 return ERR_INVALID_ADDRESS_STATE; 98 return ERR_INVALID_ADDRESS_STATE;
99 } 99 }
100 100
101 if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) { 101 if (!vm_manager.IsWithinStackRegion(dst_addr, size)) {
102 LOG_ERROR(Kernel_SVC, 102 LOG_ERROR(Kernel_SVC,
103 "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", 103 "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
104 dst_addr, size); 104 dst_addr, size);
105 return ERR_INVALID_MEMORY_RANGE; 105 return ERR_INVALID_MEMORY_RANGE;
106 } 106 }
@@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
318 return result; 318 return result;
319 } 319 }
320 320
321 return vm_manager.UnmapRange(dst_addr, size); 321 const auto unmap_res = vm_manager.UnmapRange(dst_addr, size);
322
323 // Reprotect the source mapping on success
324 if (unmap_res.IsSuccess()) {
325 ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess());
326 }
327
328 return unmap_res;
322} 329}
323 330
324/// Connect to an OS service given the port name, returns the handle to the port to out 331/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -726,19 +733,19 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
726 // 2.0.0+ 733 // 2.0.0+
727 ASLRRegionBaseAddr = 12, 734 ASLRRegionBaseAddr = 12,
728 ASLRRegionSize = 13, 735 ASLRRegionSize = 13,
729 NewMapRegionBaseAddr = 14, 736 StackRegionBaseAddr = 14,
730 NewMapRegionSize = 15, 737 StackRegionSize = 15,
731 // 3.0.0+ 738 // 3.0.0+
732 IsVirtualAddressMemoryEnabled = 16, 739 SystemResourceSize = 16,
733 PersonalMmHeapUsage = 17, 740 SystemResourceUsage = 17,
734 TitleId = 18, 741 TitleId = 18,
735 // 4.0.0+ 742 // 4.0.0+
736 PrivilegedProcessId = 19, 743 PrivilegedProcessId = 19,
737 // 5.0.0+ 744 // 5.0.0+
738 UserExceptionContextAddr = 20, 745 UserExceptionContextAddr = 20,
739 // 6.0.0+ 746 // 6.0.0+
740 TotalPhysicalMemoryAvailableWithoutMmHeap = 21, 747 TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
741 TotalPhysicalMemoryUsedWithoutMmHeap = 22, 748 TotalPhysicalMemoryUsedWithoutSystemResource = 22,
742 }; 749 };
743 750
744 const auto info_id_type = static_cast<GetInfoType>(info_id); 751 const auto info_id_type = static_cast<GetInfoType>(info_id);
@@ -752,16 +759,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
752 case GetInfoType::HeapRegionSize: 759 case GetInfoType::HeapRegionSize:
753 case GetInfoType::ASLRRegionBaseAddr: 760 case GetInfoType::ASLRRegionBaseAddr:
754 case GetInfoType::ASLRRegionSize: 761 case GetInfoType::ASLRRegionSize:
755 case GetInfoType::NewMapRegionBaseAddr: 762 case GetInfoType::StackRegionBaseAddr:
756 case GetInfoType::NewMapRegionSize: 763 case GetInfoType::StackRegionSize:
757 case GetInfoType::TotalPhysicalMemoryAvailable: 764 case GetInfoType::TotalPhysicalMemoryAvailable:
758 case GetInfoType::TotalPhysicalMemoryUsed: 765 case GetInfoType::TotalPhysicalMemoryUsed:
759 case GetInfoType::IsVirtualAddressMemoryEnabled: 766 case GetInfoType::SystemResourceSize:
760 case GetInfoType::PersonalMmHeapUsage: 767 case GetInfoType::SystemResourceUsage:
761 case GetInfoType::TitleId: 768 case GetInfoType::TitleId:
762 case GetInfoType::UserExceptionContextAddr: 769 case GetInfoType::UserExceptionContextAddr:
763 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 770 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
764 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { 771 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
765 if (info_sub_id != 0) { 772 if (info_sub_id != 0) {
766 return ERR_INVALID_ENUM_VALUE; 773 return ERR_INVALID_ENUM_VALUE;
767 } 774 }
@@ -806,12 +813,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
806 *result = process->VMManager().GetASLRRegionSize(); 813 *result = process->VMManager().GetASLRRegionSize();
807 return RESULT_SUCCESS; 814 return RESULT_SUCCESS;
808 815
809 case GetInfoType::NewMapRegionBaseAddr: 816 case GetInfoType::StackRegionBaseAddr:
810 *result = process->VMManager().GetNewMapRegionBaseAddress(); 817 *result = process->VMManager().GetStackRegionBaseAddress();
811 return RESULT_SUCCESS; 818 return RESULT_SUCCESS;
812 819
813 case GetInfoType::NewMapRegionSize: 820 case GetInfoType::StackRegionSize:
814 *result = process->VMManager().GetNewMapRegionSize(); 821 *result = process->VMManager().GetStackRegionSize();
815 return RESULT_SUCCESS; 822 return RESULT_SUCCESS;
816 823
817 case GetInfoType::TotalPhysicalMemoryAvailable: 824 case GetInfoType::TotalPhysicalMemoryAvailable:
@@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
822 *result = process->GetTotalPhysicalMemoryUsed(); 829 *result = process->GetTotalPhysicalMemoryUsed();
823 return RESULT_SUCCESS; 830 return RESULT_SUCCESS;
824 831
825 case GetInfoType::IsVirtualAddressMemoryEnabled: 832 case GetInfoType::SystemResourceSize:
826 *result = process->IsVirtualMemoryEnabled(); 833 *result = process->GetSystemResourceSize();
834 return RESULT_SUCCESS;
835
836 case GetInfoType::SystemResourceUsage:
837 LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
838 *result = process->GetSystemResourceUsage();
827 return RESULT_SUCCESS; 839 return RESULT_SUCCESS;
828 840
829 case GetInfoType::TitleId: 841 case GetInfoType::TitleId:
@@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
831 return RESULT_SUCCESS; 843 return RESULT_SUCCESS;
832 844
833 case GetInfoType::UserExceptionContextAddr: 845 case GetInfoType::UserExceptionContextAddr:
834 LOG_WARNING(Kernel_SVC, 846 *result = process->GetTLSRegionAddress();
835 "(STUBBED) Attempted to query user exception context address, returned 0");
836 *result = 0;
837 return RESULT_SUCCESS; 847 return RESULT_SUCCESS;
838 848
839 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 849 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
840 *result = process->GetTotalPhysicalMemoryAvailable(); 850 *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
841 return RESULT_SUCCESS; 851 return RESULT_SUCCESS;
842 852
843 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: 853 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
844 *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); 854 *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
845 return RESULT_SUCCESS; 855 return RESULT_SUCCESS;
846 856
847 default: 857 default:
@@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
946 } 956 }
947} 957}
948 958
959/// Maps memory at a desired address
960static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
961 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
962
963 if (!Common::Is4KBAligned(addr)) {
964 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
965 return ERR_INVALID_ADDRESS;
966 }
967
968 if (!Common::Is4KBAligned(size)) {
969 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
970 return ERR_INVALID_SIZE;
971 }
972
973 if (size == 0) {
974 LOG_ERROR(Kernel_SVC, "Size is zero");
975 return ERR_INVALID_SIZE;
976 }
977
978 if (!(addr < addr + size)) {
979 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
980 return ERR_INVALID_MEMORY_RANGE;
981 }
982
983 Process* const current_process = system.Kernel().CurrentProcess();
984 auto& vm_manager = current_process->VMManager();
985
986 if (current_process->GetSystemResourceSize() == 0) {
987 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
988 return ERR_INVALID_STATE;
989 }
990
991 if (!vm_manager.IsWithinMapRegion(addr, size)) {
992 LOG_ERROR(Kernel_SVC, "Range not within map region");
993 return ERR_INVALID_MEMORY_RANGE;
994 }
995
996 return vm_manager.MapPhysicalMemory(addr, size);
997}
998
999/// Unmaps memory previously mapped via MapPhysicalMemory
1000static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
1001 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
1002
1003 if (!Common::Is4KBAligned(addr)) {
1004 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1005 return ERR_INVALID_ADDRESS;
1006 }
1007
1008 if (!Common::Is4KBAligned(size)) {
1009 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1010 return ERR_INVALID_SIZE;
1011 }
1012
1013 if (size == 0) {
1014 LOG_ERROR(Kernel_SVC, "Size is zero");
1015 return ERR_INVALID_SIZE;
1016 }
1017
1018 if (!(addr < addr + size)) {
1019 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1020 return ERR_INVALID_MEMORY_RANGE;
1021 }
1022
1023 Process* const current_process = system.Kernel().CurrentProcess();
1024 auto& vm_manager = current_process->VMManager();
1025
1026 if (current_process->GetSystemResourceSize() == 0) {
1027 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1028 return ERR_INVALID_STATE;
1029 }
1030
1031 if (!vm_manager.IsWithinMapRegion(addr, size)) {
1032 LOG_ERROR(Kernel_SVC, "Range not within map region");
1033 return ERR_INVALID_MEMORY_RANGE;
1034 }
1035
1036 return vm_manager.UnmapPhysicalMemory(addr, size);
1037}
1038
949/// Sets the thread activity 1039/// Sets the thread activity
950static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { 1040static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
951 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 1041 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
1647// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1648static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1649 s64 timeout) { 1739 s64 timeout) {
1650 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", 1740 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
1651 address, type, value, timeout); 1741 type, value, timeout);
1652 1742
1653 // If the passed address is a kernel virtual address, return invalid memory state. 1743 // If the passed address is a kernel virtual address, return invalid memory state.
1654 if (Memory::IsKernelVirtualAddress(address)) { 1744 if (Memory::IsKernelVirtualAddress(address)) {
@@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1670// Signals to an address (via Address Arbiter) 1760// Signals to an address (via Address Arbiter)
1671static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, 1761static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1672 s32 num_to_wake) { 1762 s32 num_to_wake) {
1673 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", 1763 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
1674 address, type, value, num_to_wake); 1764 address, type, value, num_to_wake);
1675 1765
1676 // If the passed address is a kernel virtual address, return invalid memory state. 1766 // If the passed address is a kernel virtual address, return invalid memory state.
1677 if (Memory::IsKernelVirtualAddress(address)) { 1767 if (Memory::IsKernelVirtualAddress(address)) {
@@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = {
2303 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2393 {0x29, SvcWrap<GetInfo>, "GetInfo"},
2304 {0x2A, nullptr, "FlushEntireDataCache"}, 2394 {0x2A, nullptr, "FlushEntireDataCache"},
2305 {0x2B, nullptr, "FlushDataCache"}, 2395 {0x2B, nullptr, "FlushDataCache"},
2306 {0x2C, nullptr, "MapPhysicalMemory"}, 2396 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
2307 {0x2D, nullptr, "UnmapPhysicalMemory"}, 2397 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2308 {0x2E, nullptr, "GetFutureThreadInfo"}, 2398 {0x2E, nullptr, "GetFutureThreadInfo"},
2309 {0x2F, nullptr, "GetLastThreadInfo"}, 2399 {0x2F, nullptr, "GetLastThreadInfo"},
2310 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2400 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 865473c6f..c2d8d0dc3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 32 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 33}
34 34
35template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38}
39
35template <ResultCode func(Core::System&, u32)> 40template <ResultCode func(Core::System&, u32)>
36void SvcWrap(Core::System& system) { 41void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index a055a5002..ec529e7f2 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -65,7 +65,7 @@ void Thread::Stop() {
65 owner_process->UnregisterThread(this); 65 owner_process->UnregisterThread(this);
66 66
67 // Mark the TLS slot in the thread's page as free. 67 // Mark the TLS slot in the thread's page as free.
68 owner_process->FreeTLSSlot(tls_address); 68 owner_process->FreeTLSRegion(tls_address);
69} 69}
70 70
71void Thread::WakeAfterDelay(s64 nanoseconds) { 71void Thread::WakeAfterDelay(s64 nanoseconds) {
@@ -205,9 +205,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
205 thread->name = std::move(name); 205 thread->name = std::move(name);
206 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); 206 thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
207 thread->owner_process = &owner_process; 207 thread->owner_process = &owner_process;
208 thread->tls_address = thread->owner_process->CreateTLSRegion();
208 thread->scheduler = &system.Scheduler(processor_id); 209 thread->scheduler = &system.Scheduler(processor_id);
209 thread->scheduler->AddThread(thread); 210 thread->scheduler->AddThread(thread);
210 thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
211 211
212 thread->owner_process->RegisterThread(thread.get()); 212 thread->owner_process->RegisterThread(thread.get());
213 213
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index b4b9cda7c..07e989637 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <functional> 7#include <functional>
8#include <memory>
9#include <string> 8#include <string>
10#include <vector> 9#include <vector>
11 10
@@ -78,9 +77,6 @@ enum class ThreadActivity : u32 {
78 77
79class Thread final : public WaitObject { 78class Thread final : public WaitObject {
80public: 79public:
81 using TLSMemory = std::vector<u8>;
82 using TLSMemoryPtr = std::shared_ptr<TLSMemory>;
83
84 using MutexWaitingThreads = std::vector<SharedPtr<Thread>>; 80 using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
85 81
86 using ThreadContext = Core::ARM_Interface::ThreadContext; 82 using ThreadContext = Core::ARM_Interface::ThreadContext;
@@ -169,14 +165,6 @@ public:
169 return thread_id; 165 return thread_id;
170 } 166 }
171 167
172 TLSMemoryPtr& GetTLSMemory() {
173 return tls_memory;
174 }
175
176 const TLSMemoryPtr& GetTLSMemory() const {
177 return tls_memory;
178 }
179
180 /// Resumes a thread from waiting 168 /// Resumes a thread from waiting
181 void ResumeFromWait(); 169 void ResumeFromWait();
182 170
@@ -463,11 +451,9 @@ private:
463 u32 ideal_core{0xFFFFFFFF}; 451 u32 ideal_core{0xFFFFFFFF};
464 u64 affinity_mask{0x1}; 452 u64 affinity_mask{0x1};
465 453
466 TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>(); 454 ThreadActivity activity = ThreadActivity::Normal;
467 455
468 std::string name; 456 std::string name;
469
470 ThreadActivity activity = ThreadActivity::Normal;
471}; 457};
472 458
473/** 459/**
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 26c4e5e67..1113c815e 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
47 return ERR_INVALID_STATE; 47 return ERR_INVALID_STATE;
48 } 48 }
49 49
50 backing_block = std::make_shared<std::vector<u8>>(size); 50 backing_block = std::make_shared<PhysicalMemory>(size);
51 51
52 const auto map_state = owner_permissions == MemoryPermission::None 52 const auto map_state = owner_permissions == MemoryPermission::None
53 ? MemoryState::TransferMemoryIsolated 53 ? MemoryState::TransferMemoryIsolated
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index a140b1e2b..6be9dc094 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "core/hle/kernel/object.h" 10#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12union ResultCode; 13union ResultCode;
13 14
@@ -82,7 +83,7 @@ private:
82 ~TransferMemory() override; 83 ~TransferMemory() override;
83 84
84 /// Memory block backing this instance. 85 /// Memory block backing this instance.
85 std::shared_ptr<std::vector<u8>> backing_block; 86 std::shared_ptr<PhysicalMemory> backing_block;
86 87
87 /// The base address for the memory managed by this instance. 88 /// The base address for the memory managed by this instance.
88 VAddr base_address = 0; 89 VAddr base_address = 0;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index c929c2a52..c7af87073 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -5,13 +5,15 @@
5#include <algorithm> 5#include <algorithm>
6#include <iterator> 6#include <iterator>
7#include <utility> 7#include <utility>
8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
10#include "common/memory_hook.h" 11#include "common/memory_hook.h"
11#include "core/arm/arm_interface.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
14#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
15#include "core/hle/kernel/process.h"
16#include "core/hle/kernel/resource_limit.h"
15#include "core/hle/kernel/vm_manager.h" 17#include "core/hle/kernel/vm_manager.h"
16#include "core/memory.h" 18#include "core/memory.h"
17#include "core/memory_setup.h" 19#include "core/memory_setup.h"
@@ -49,10 +51,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
49 type != next.type) { 51 type != next.type) {
50 return false; 52 return false;
51 } 53 }
52 if (type == VMAType::AllocatedMemoryBlock && 54 if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
53 (backing_block != next.backing_block || offset + size != next.offset)) { 55 // TODO: Can device mapped memory be merged sanely?
56 // Not merging it may cause inaccuracies versus hardware when memory layout is queried.
54 return false; 57 return false;
55 } 58 }
59 if (type == VMAType::AllocatedMemoryBlock) {
60 return true;
61 }
56 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { 62 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
57 return false; 63 return false;
58 } 64 }
@@ -98,9 +104,9 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
98} 104}
99 105
100ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 106ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
101 std::shared_ptr<std::vector<u8>> block, 107 std::shared_ptr<PhysicalMemory> block,
102 std::size_t offset, u64 size, 108 std::size_t offset, u64 size,
103 MemoryState state) { 109 MemoryState state, VMAPermission perm) {
104 ASSERT(block != nullptr); 110 ASSERT(block != nullptr);
105 ASSERT(offset + size <= block->size()); 111 ASSERT(offset + size <= block->size());
106 112
@@ -109,17 +115,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
109 VirtualMemoryArea& final_vma = vma_handle->second; 115 VirtualMemoryArea& final_vma = vma_handle->second;
110 ASSERT(final_vma.size == size); 116 ASSERT(final_vma.size == size);
111 117
112 system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
113 VMAPermission::ReadWriteExecute);
114 system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
115 VMAPermission::ReadWriteExecute);
116 system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
117 VMAPermission::ReadWriteExecute);
118 system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
119 VMAPermission::ReadWriteExecute);
120
121 final_vma.type = VMAType::AllocatedMemoryBlock; 118 final_vma.type = VMAType::AllocatedMemoryBlock;
122 final_vma.permissions = VMAPermission::ReadWrite; 119 final_vma.permissions = perm;
123 final_vma.state = state; 120 final_vma.state = state;
124 final_vma.backing_block = std::move(block); 121 final_vma.backing_block = std::move(block);
125 final_vma.offset = offset; 122 final_vma.offset = offset;
@@ -137,11 +134,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
137 VirtualMemoryArea& final_vma = vma_handle->second; 134 VirtualMemoryArea& final_vma = vma_handle->second;
138 ASSERT(final_vma.size == size); 135 ASSERT(final_vma.size == size);
139 136
140 system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
141 system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
142 system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
143 system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
144
145 final_vma.type = VMAType::BackingMemory; 137 final_vma.type = VMAType::BackingMemory;
146 final_vma.permissions = VMAPermission::ReadWrite; 138 final_vma.permissions = VMAPermission::ReadWrite;
147 final_vma.state = state; 139 final_vma.state = state;
@@ -152,22 +144,33 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
152} 144}
153 145
154ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const { 146ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
155 // Find the first Free VMA. 147 return FindFreeRegion(GetASLRRegionBaseAddress(), GetASLRRegionEndAddress(), size);
156 const VAddr base = GetASLRRegionBaseAddress(); 148}
157 const VMAHandle vma_handle = std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
158 if (vma.second.type != VMAType::Free)
159 return false;
160 149
161 const VAddr vma_end = vma.second.base + vma.second.size; 150ResultVal<VAddr> VMManager::FindFreeRegion(VAddr begin, VAddr end, u64 size) const {
162 return vma_end > base && vma_end >= base + size; 151 ASSERT(begin < end);
163 }); 152 ASSERT(size <= end - begin);
164 153
165 if (vma_handle == vma_map.end()) { 154 const VMAHandle vma_handle =
155 std::find_if(vma_map.begin(), vma_map.end(), [begin, end, size](const auto& vma) {
156 if (vma.second.type != VMAType::Free) {
157 return false;
158 }
159 const VAddr vma_base = vma.second.base;
160 const VAddr vma_end = vma_base + vma.second.size;
161 const VAddr assumed_base = (begin < vma_base) ? vma_base : begin;
162 const VAddr used_range = assumed_base + size;
163
164 return vma_base <= assumed_base && assumed_base < used_range && used_range < end &&
165 used_range <= vma_end;
166 });
167
168 if (vma_handle == vma_map.cend()) {
166 // TODO(Subv): Find the correct error code here. 169 // TODO(Subv): Find the correct error code here.
167 return ResultCode(-1); 170 return ResultCode(-1);
168 } 171 }
169 172
170 const VAddr target = std::max(base, vma_handle->second.base); 173 const VAddr target = std::max(begin, vma_handle->second.base);
171 return MakeResult<VAddr>(target); 174 return MakeResult<VAddr>(target);
172} 175}
173 176
@@ -219,11 +222,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
219 222
220 ASSERT(FindVMA(target)->second.size >= size); 223 ASSERT(FindVMA(target)->second.size >= size);
221 224
222 system.ArmInterface(0).UnmapMemory(target, size);
223 system.ArmInterface(1).UnmapMemory(target, size);
224 system.ArmInterface(2).UnmapMemory(target, size);
225 system.ArmInterface(3).UnmapMemory(target, size);
226
227 return RESULT_SUCCESS; 225 return RESULT_SUCCESS;
228} 226}
229 227
@@ -263,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
263 261
264 if (heap_memory == nullptr) { 262 if (heap_memory == nullptr) {
265 // Initialize heap 263 // Initialize heap
266 heap_memory = std::make_shared<std::vector<u8>>(size); 264 heap_memory = std::make_shared<PhysicalMemory>(size);
267 heap_end = heap_region_base + size; 265 heap_end = heap_region_base + size;
268 } else { 266 } else {
269 UnmapRange(heap_region_base, GetCurrentHeapSize()); 267 UnmapRange(heap_region_base, GetCurrentHeapSize());
@@ -297,6 +295,162 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
297 return MakeResult<VAddr>(heap_region_base); 295 return MakeResult<VAddr>(heap_region_base);
298} 296}
299 297
298ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
299 // Check how much memory we've already mapped.
300 const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
301 if (mapped_size_result.Failed()) {
302 return mapped_size_result.Code();
303 }
304
305 // If we've already mapped the desired amount, return early.
306 const std::size_t mapped_size = *mapped_size_result;
307 if (mapped_size == size) {
308 return RESULT_SUCCESS;
309 }
310
311 // Check that we can map the memory we want.
312 const auto res_limit = system.CurrentProcess()->GetResourceLimit();
313 const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
314 res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
315 if (physmem_remaining < (size - mapped_size)) {
316 return ERR_RESOURCE_LIMIT_EXCEEDED;
317 }
318
319 // Keep track of the memory regions we unmap.
320 std::vector<std::pair<u64, u64>> mapped_regions;
321 ResultCode result = RESULT_SUCCESS;
322
323 // Iterate, trying to map memory.
324 {
325 const auto end_addr = target + size;
326 const auto last_addr = end_addr - 1;
327 VAddr cur_addr = target;
328
329 auto iter = FindVMA(target);
330 ASSERT(iter != vma_map.end());
331
332 while (true) {
333 const auto& vma = iter->second;
334 const auto vma_start = vma.base;
335 const auto vma_end = vma_start + vma.size;
336 const auto vma_last = vma_end - 1;
337
338 // Map the memory block
339 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
340 if (vma.state == MemoryState::Unmapped) {
341 const auto map_res =
342 MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size), 0,
343 map_size, MemoryState::Heap, VMAPermission::ReadWrite);
344 result = map_res.Code();
345 if (result.IsError()) {
346 break;
347 }
348
349 mapped_regions.emplace_back(cur_addr, map_size);
350 }
351
352 // Break once we hit the end of the range.
353 if (last_addr <= vma_last) {
354 break;
355 }
356
357 // Advance to the next block.
358 cur_addr = vma_end;
359 iter = FindVMA(cur_addr);
360 ASSERT(iter != vma_map.end());
361 }
362 }
363
364 // If we failed, unmap memory.
365 if (result.IsError()) {
366 for (const auto [unmap_address, unmap_size] : mapped_regions) {
367 ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
368 "Failed to unmap memory range.");
369 }
370
371 return result;
372 }
373
374 // Update amount of mapped physical memory.
375 physical_memory_mapped += size - mapped_size;
376
377 return RESULT_SUCCESS;
378}
379
380ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
381 // Check how much memory is currently mapped.
382 const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
383 if (mapped_size_result.Failed()) {
384 return mapped_size_result.Code();
385 }
386
387 // If we've already unmapped all the memory, return early.
388 const std::size_t mapped_size = *mapped_size_result;
389 if (mapped_size == 0) {
390 return RESULT_SUCCESS;
391 }
392
393 // Keep track of the memory regions we unmap.
394 std::vector<std::pair<u64, u64>> unmapped_regions;
395 ResultCode result = RESULT_SUCCESS;
396
397 // Try to unmap regions.
398 {
399 const auto end_addr = target + size;
400 const auto last_addr = end_addr - 1;
401 VAddr cur_addr = target;
402
403 auto iter = FindVMA(target);
404 ASSERT(iter != vma_map.end());
405
406 while (true) {
407 const auto& vma = iter->second;
408 const auto vma_start = vma.base;
409 const auto vma_end = vma_start + vma.size;
410 const auto vma_last = vma_end - 1;
411
412 // Unmap the memory block
413 const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
414 if (vma.state == MemoryState::Heap) {
415 result = UnmapRange(cur_addr, unmap_size);
416 if (result.IsError()) {
417 break;
418 }
419
420 unmapped_regions.emplace_back(cur_addr, unmap_size);
421 }
422
423 // Break once we hit the end of the range.
424 if (last_addr <= vma_last) {
425 break;
426 }
427
428 // Advance to the next block.
429 cur_addr = vma_end;
430 iter = FindVMA(cur_addr);
431 ASSERT(iter != vma_map.end());
432 }
433 }
434
435 // If we failed, re-map regions.
436 // TODO: Preserve memory contents?
437 if (result.IsError()) {
438 for (const auto [map_address, map_size] : unmapped_regions) {
439 const auto remap_res =
440 MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size), 0, map_size,
441 MemoryState::Heap, VMAPermission::None);
442 ASSERT_MSG(remap_res.Succeeded(), "Failed to remap a memory block.");
443 }
444
445 return result;
446 }
447
448 // Update mapped amount
449 physical_memory_mapped -= mapped_size;
450
451 return RESULT_SUCCESS;
452}
453
300ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { 454ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
301 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; 455 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
302 const auto src_check_result = CheckRangeState( 456 const auto src_check_result = CheckRangeState(
@@ -436,7 +590,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
436 ASSERT_MSG(vma_offset + size <= vma->second.size, 590 ASSERT_MSG(vma_offset + size <= vma->second.size,
437 "Shared memory exceeds bounds of mapped block"); 591 "Shared memory exceeds bounds of mapped block");
438 592
439 const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; 593 const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block;
440 const std::size_t backing_block_offset = vma->second.offset + vma_offset; 594 const std::size_t backing_block_offset = vma->second.offset + vma_offset;
441 595
442 CASCADE_RESULT(auto new_vma, 596 CASCADE_RESULT(auto new_vma,
@@ -444,12 +598,12 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
444 // Protect mirror with permissions from old region 598 // Protect mirror with permissions from old region
445 Reprotect(new_vma, vma->second.permissions); 599 Reprotect(new_vma, vma->second.permissions);
446 // Remove permissions from old region 600 // Remove permissions from old region
447 Reprotect(vma, VMAPermission::None); 601 ReprotectRange(src_addr, size, VMAPermission::None);
448 602
449 return RESULT_SUCCESS; 603 return RESULT_SUCCESS;
450} 604}
451 605
452void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { 606void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) {
453 // If this ever proves to have a noticeable performance impact, allow users of the function to 607 // If this ever proves to have a noticeable performance impact, allow users of the function to
454 // specify a specific range of addresses to limit the scan to. 608 // specify a specific range of addresses to limit the scan to.
455 for (const auto& p : vma_map) { 609 for (const auto& p : vma_map) {
@@ -577,14 +731,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
577VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { 731VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
578 const VMAIter next_vma = std::next(iter); 732 const VMAIter next_vma = std::next(iter);
579 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { 733 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
580 iter->second.size += next_vma->second.size; 734 MergeAdjacentVMA(iter->second, next_vma->second);
581 vma_map.erase(next_vma); 735 vma_map.erase(next_vma);
582 } 736 }
583 737
584 if (iter != vma_map.begin()) { 738 if (iter != vma_map.begin()) {
585 VMAIter prev_vma = std::prev(iter); 739 VMAIter prev_vma = std::prev(iter);
586 if (prev_vma->second.CanBeMergedWith(iter->second)) { 740 if (prev_vma->second.CanBeMergedWith(iter->second)) {
587 prev_vma->second.size += iter->second.size; 741 MergeAdjacentVMA(prev_vma->second, iter->second);
588 vma_map.erase(iter); 742 vma_map.erase(iter);
589 iter = prev_vma; 743 iter = prev_vma;
590 } 744 }
@@ -593,6 +747,44 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
593 return iter; 747 return iter;
594} 748}
595 749
750void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
751 ASSERT(left.CanBeMergedWith(right));
752
753 // Always merge allocated memory blocks, even when they don't share the same backing block.
754 if (left.type == VMAType::AllocatedMemoryBlock &&
755 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
756 const auto right_begin = right.backing_block->begin() + right.offset;
757 const auto right_end = right_begin + right.size;
758
759 // Check if we can save work.
760 if (left.offset == 0 && left.size == left.backing_block->size()) {
761 // Fast case: left is an entire backing block.
762 left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
763 } else {
764 // Slow case: make a new memory block for left and right.
765 const auto left_begin = left.backing_block->begin() + left.offset;
766 const auto left_end = left_begin + left.size;
767 const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
768 const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
769
770 auto new_memory = std::make_shared<PhysicalMemory>();
771 new_memory->reserve(left_size + right_size);
772 new_memory->insert(new_memory->end(), left_begin, left_end);
773 new_memory->insert(new_memory->end(), right_begin, right_end);
774
775 left.backing_block = std::move(new_memory);
776 left.offset = 0;
777 }
778
779 // Page table update is needed, because backing memory changed.
780 left.size += right.size;
781 UpdatePageTableForVMA(left);
782 } else {
783 // Just update the size.
784 left.size += right.size;
785 }
786}
787
596void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { 788void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
597 switch (vma.type) { 789 switch (vma.type) {
598 case VMAType::Free: 790 case VMAType::Free:
@@ -614,9 +806,11 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
614void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) { 806void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType type) {
615 u64 map_region_size = 0; 807 u64 map_region_size = 0;
616 u64 heap_region_size = 0; 808 u64 heap_region_size = 0;
617 u64 new_map_region_size = 0; 809 u64 stack_region_size = 0;
618 u64 tls_io_region_size = 0; 810 u64 tls_io_region_size = 0;
619 811
812 u64 stack_and_tls_io_end = 0;
813
620 switch (type) { 814 switch (type) {
621 case FileSys::ProgramAddressSpaceType::Is32Bit: 815 case FileSys::ProgramAddressSpaceType::Is32Bit:
622 case FileSys::ProgramAddressSpaceType::Is32BitNoMap: 816 case FileSys::ProgramAddressSpaceType::Is32BitNoMap:
@@ -632,6 +826,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
632 map_region_size = 0; 826 map_region_size = 0;
633 heap_region_size = 0x80000000; 827 heap_region_size = 0x80000000;
634 } 828 }
829 stack_and_tls_io_end = 0x40000000;
635 break; 830 break;
636 case FileSys::ProgramAddressSpaceType::Is36Bit: 831 case FileSys::ProgramAddressSpaceType::Is36Bit:
637 address_space_width = 36; 832 address_space_width = 36;
@@ -641,6 +836,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
641 aslr_region_end = aslr_region_base + 0xFF8000000; 836 aslr_region_end = aslr_region_base + 0xFF8000000;
642 map_region_size = 0x180000000; 837 map_region_size = 0x180000000;
643 heap_region_size = 0x180000000; 838 heap_region_size = 0x180000000;
839 stack_and_tls_io_end = 0x80000000;
644 break; 840 break;
645 case FileSys::ProgramAddressSpaceType::Is39Bit: 841 case FileSys::ProgramAddressSpaceType::Is39Bit:
646 address_space_width = 39; 842 address_space_width = 39;
@@ -650,7 +846,7 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
650 aslr_region_end = aslr_region_base + 0x7FF8000000; 846 aslr_region_end = aslr_region_base + 0x7FF8000000;
651 map_region_size = 0x1000000000; 847 map_region_size = 0x1000000000;
652 heap_region_size = 0x180000000; 848 heap_region_size = 0x180000000;
653 new_map_region_size = 0x80000000; 849 stack_region_size = 0x80000000;
654 tls_io_region_size = 0x1000000000; 850 tls_io_region_size = 0x1000000000;
655 break; 851 break;
656 default: 852 default:
@@ -658,6 +854,8 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
658 return; 854 return;
659 } 855 }
660 856
857 const u64 stack_and_tls_io_begin = aslr_region_base;
858
661 address_space_base = 0; 859 address_space_base = 0;
662 address_space_end = 1ULL << address_space_width; 860 address_space_end = 1ULL << address_space_width;
663 861
@@ -668,15 +866,20 @@ void VMManager::InitializeMemoryRegionRanges(FileSys::ProgramAddressSpaceType ty
668 heap_region_end = heap_region_base + heap_region_size; 866 heap_region_end = heap_region_base + heap_region_size;
669 heap_end = heap_region_base; 867 heap_end = heap_region_base;
670 868
671 new_map_region_base = heap_region_end; 869 stack_region_base = heap_region_end;
672 new_map_region_end = new_map_region_base + new_map_region_size; 870 stack_region_end = stack_region_base + stack_region_size;
673 871
674 tls_io_region_base = new_map_region_end; 872 tls_io_region_base = stack_region_end;
675 tls_io_region_end = tls_io_region_base + tls_io_region_size; 873 tls_io_region_end = tls_io_region_base + tls_io_region_size;
676 874
677 if (new_map_region_size == 0) { 875 if (stack_region_size == 0) {
678 new_map_region_base = address_space_base; 876 stack_region_base = stack_and_tls_io_begin;
679 new_map_region_end = address_space_end; 877 stack_region_end = stack_and_tls_io_end;
878 }
879
880 if (tls_io_region_size == 0) {
881 tls_io_region_base = stack_and_tls_io_begin;
882 tls_io_region_end = stack_and_tls_io_end;
680 } 883 }
681} 884}
682 885
@@ -756,6 +959,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
756 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); 959 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
757} 960}
758 961
962ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
963 std::size_t size) const {
964 const VAddr end_addr = address + size;
965 const VAddr last_addr = end_addr - 1;
966 std::size_t mapped_size = 0;
967
968 VAddr cur_addr = address;
969 auto iter = FindVMA(cur_addr);
970 ASSERT(iter != vma_map.end());
971
972 while (true) {
973 const auto& vma = iter->second;
974 const VAddr vma_start = vma.base;
975 const VAddr vma_end = vma_start + vma.size;
976 const VAddr vma_last = vma_end - 1;
977
978 // Add size if relevant.
979 if (vma.state != MemoryState::Unmapped) {
980 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
981 }
982
983 // Break once we hit the end of the range.
984 if (last_addr <= vma_last) {
985 break;
986 }
987
988 // Advance to the next block.
989 cur_addr = vma_end;
990 iter = std::next(iter);
991 ASSERT(iter != vma_map.end());
992 }
993
994 return MakeResult(mapped_size);
995}
996
997ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
998 std::size_t size) const {
999 const VAddr end_addr = address + size;
1000 const VAddr last_addr = end_addr - 1;
1001 std::size_t mapped_size = 0;
1002
1003 VAddr cur_addr = address;
1004 auto iter = FindVMA(cur_addr);
1005 ASSERT(iter != vma_map.end());
1006
1007 while (true) {
1008 const auto& vma = iter->second;
1009 const auto vma_start = vma.base;
1010 const auto vma_end = vma_start + vma.size;
1011 const auto vma_last = vma_end - 1;
1012 const auto state = vma.state;
1013 const auto attr = vma.attribute;
1014
1015 // Memory within region must be free or mapped heap.
1016 if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
1017 (state == MemoryState::Unmapped))) {
1018 return ERR_INVALID_ADDRESS_STATE;
1019 }
1020
1021 // Add size if relevant.
1022 if (state != MemoryState::Unmapped) {
1023 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
1024 }
1025
1026 // Break once we hit the end of the range.
1027 if (last_addr <= vma_last) {
1028 break;
1029 }
1030
1031 // Advance to the next block.
1032 cur_addr = vma_end;
1033 iter = std::next(iter);
1034 ASSERT(iter != vma_map.end());
1035 }
1036
1037 return MakeResult(mapped_size);
1038}
1039
759u64 VMManager::GetTotalPhysicalMemoryAvailable() const { 1040u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
760 LOG_WARNING(Kernel, "(STUBBED) called"); 1041 LOG_WARNING(Kernel, "(STUBBED) called");
761 return 0xF8000000; 1042 return 0xF8000000;
@@ -868,21 +1149,21 @@ bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
868 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); 1149 return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
869} 1150}
870 1151
871VAddr VMManager::GetNewMapRegionBaseAddress() const { 1152VAddr VMManager::GetStackRegionBaseAddress() const {
872 return new_map_region_base; 1153 return stack_region_base;
873} 1154}
874 1155
875VAddr VMManager::GetNewMapRegionEndAddress() const { 1156VAddr VMManager::GetStackRegionEndAddress() const {
876 return new_map_region_end; 1157 return stack_region_end;
877} 1158}
878 1159
879u64 VMManager::GetNewMapRegionSize() const { 1160u64 VMManager::GetStackRegionSize() const {
880 return new_map_region_end - new_map_region_base; 1161 return stack_region_end - stack_region_base;
881} 1162}
882 1163
883bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const { 1164bool VMManager::IsWithinStackRegion(VAddr address, u64 size) const {
884 return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(), 1165 return IsInsideAddressRange(address, size, GetStackRegionBaseAddress(),
885 GetNewMapRegionEndAddress()); 1166 GetStackRegionEndAddress());
886} 1167}
887 1168
888VAddr VMManager::GetTLSIORegionBaseAddress() const { 1169VAddr VMManager::GetTLSIORegionBaseAddress() const {
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index dfbf7a894..850a7ebc3 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h" 12#include "common/memory_hook.h"
13#include "common/page_table.h" 13#include "common/page_table.h"
14#include "core/hle/kernel/physical_memory.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15#include "core/memory.h" 16#include "core/memory.h"
16 17
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 291
291 // Settings for type = AllocatedMemoryBlock 292 // Settings for type = AllocatedMemoryBlock
292 /// Memory block backing this VMA. 293 /// Memory block backing this VMA.
293 std::shared_ptr<std::vector<u8>> backing_block = nullptr; 294 std::shared_ptr<PhysicalMemory> backing_block = nullptr;
294 /// Offset into the backing_memory the mapping starts from. 295 /// Offset into the backing_memory the mapping starts from.
295 std::size_t offset = 0; 296 std::size_t offset = 0;
296 297
@@ -348,8 +349,9 @@ public:
348 * @param size Size of the mapping. 349 * @param size Size of the mapping.
349 * @param state MemoryState tag to attach to the VMA. 350 * @param state MemoryState tag to attach to the VMA.
350 */ 351 */
351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 352 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block,
352 std::size_t offset, u64 size, MemoryState state); 353 std::size_t offset, u64 size, MemoryState state,
354 VMAPermission perm = VMAPermission::ReadWrite);
353 355
354 /** 356 /**
355 * Maps an unmanaged host memory pointer at a given address. 357 * Maps an unmanaged host memory pointer at a given address.
@@ -362,14 +364,39 @@ public:
362 ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state); 364 ResultVal<VMAHandle> MapBackingMemory(VAddr target, u8* memory, u64 size, MemoryState state);
363 365
364 /** 366 /**
365 * Finds the first free address that can hold a region of the desired size. 367 * Finds the first free memory region of the given size within
368 * the user-addressable ASLR memory region.
366 * 369 *
367 * @param size Size of the desired region. 370 * @param size The size of the desired region in bytes.
368 * @return The found free address. 371 *
372 * @returns If successful, the base address of the free region with
373 * the given size.
369 */ 374 */
370 ResultVal<VAddr> FindFreeRegion(u64 size) const; 375 ResultVal<VAddr> FindFreeRegion(u64 size) const;
371 376
372 /** 377 /**
378 * Finds the first free address range that can hold a region of the desired size
379 *
380 * @param begin The starting address of the range.
381 * This is treated as an inclusive beginning address.
382 *
383 * @param end The ending address of the range.
384 * This is treated as an exclusive ending address.
385 *
386 * @param size The size of the free region to attempt to locate,
387 * in bytes.
388 *
389 * @returns If successful, the base address of the free region with
390 * the given size.
391 *
392 * @returns If unsuccessful, a result containing an error code.
393 *
394 * @pre The starting address must be less than the ending address.
395 * @pre The size must not exceed the address range itself.
396 */
397 ResultVal<VAddr> FindFreeRegion(VAddr begin, VAddr end, u64 size) const;
398
399 /**
373 * Maps a memory-mapped IO region at a given address. 400 * Maps a memory-mapped IO region at a given address.
374 * 401 *
375 * @param target The guest address to start the mapping at. 402 * @param target The guest address to start the mapping at.
@@ -425,6 +452,34 @@ public:
425 /// 452 ///
426 ResultVal<VAddr> SetHeapSize(u64 size); 453 ResultVal<VAddr> SetHeapSize(u64 size);
427 454
455 /// Maps memory at a given address.
456 ///
457 /// @param target The virtual address to map memory at.
458 /// @param size The amount of memory to map.
459 ///
460 /// @note The destination address must lie within the Map region.
461 ///
462 /// @note This function requires that SystemResourceSize be non-zero,
463 /// however, this is just because if it were not then the
464 /// resulting page tables could be exploited on hardware by
465 /// a malicious program. SystemResource usage does not need
466 /// to be explicitly checked or updated here.
467 ResultCode MapPhysicalMemory(VAddr target, u64 size);
468
469 /// Unmaps memory at a given address.
470 ///
471 /// @param target The virtual address to unmap memory at.
472 /// @param size The amount of memory to unmap.
473 ///
474 /// @note The destination address must lie within the Map region.
475 ///
476 /// @note This function requires that SystemResourceSize be non-zero,
477 /// however, this is just because if it were not then the
478 /// resulting page tables could be exploited on hardware by
479 /// a malicious program. SystemResource usage does not need
480 /// to be explicitly checked or updated here.
481 ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
482
428 /// Maps a region of memory as code memory. 483 /// Maps a region of memory as code memory.
429 /// 484 ///
430 /// @param dst_address The base address of the region to create the aliasing memory region. 485 /// @param dst_address The base address of the region to create the aliasing memory region.
@@ -493,7 +548,7 @@ public:
493 * Scans all VMAs and updates the page table range of any that use the given vector as backing 548 * Scans all VMAs and updates the page table range of any that use the given vector as backing
494 * memory. This should be called after any operation that causes reallocation of the vector. 549 * memory. This should be called after any operation that causes reallocation of the vector.
495 */ 550 */
496 void RefreshMemoryBlockMappings(const std::vector<u8>* block); 551 void RefreshMemoryBlockMappings(const PhysicalMemory* block);
497 552
498 /// Dumps the address space layout to the log, for debugging 553 /// Dumps the address space layout to the log, for debugging
499 void LogLayout() const; 554 void LogLayout() const;
@@ -571,17 +626,17 @@ public:
571 /// Determines whether or not the specified range is within the map region. 626 /// Determines whether or not the specified range is within the map region.
572 bool IsWithinMapRegion(VAddr address, u64 size) const; 627 bool IsWithinMapRegion(VAddr address, u64 size) const;
573 628
574 /// Gets the base address of the new map region. 629 /// Gets the base address of the stack region.
575 VAddr GetNewMapRegionBaseAddress() const; 630 VAddr GetStackRegionBaseAddress() const;
576 631
577 /// Gets the end address of the new map region. 632 /// Gets the end address of the stack region.
578 VAddr GetNewMapRegionEndAddress() const; 633 VAddr GetStackRegionEndAddress() const;
579 634
580 /// Gets the total size of the new map region in bytes. 635 /// Gets the total size of the stack region in bytes.
581 u64 GetNewMapRegionSize() const; 636 u64 GetStackRegionSize() const;
582 637
583 /// Determines whether or not the given address range is within the new map region 638 /// Determines whether or not the given address range is within the stack region
584 bool IsWithinNewMapRegion(VAddr address, u64 size) const; 639 bool IsWithinStackRegion(VAddr address, u64 size) const;
585 640
586 /// Gets the base address of the TLS IO region. 641 /// Gets the base address of the TLS IO region.
587 VAddr GetTLSIORegionBaseAddress() const; 642 VAddr GetTLSIORegionBaseAddress() const;
@@ -632,6 +687,11 @@ private:
632 */ 687 */
633 VMAIter MergeAdjacent(VMAIter vma); 688 VMAIter MergeAdjacent(VMAIter vma);
634 689
690 /**
691 * Merges two adjacent VMAs.
692 */
693 void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
694
635 /// Updates the pages corresponding to this VMA so they match the VMA's attributes. 695 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
636 void UpdatePageTableForVMA(const VirtualMemoryArea& vma); 696 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
637 697
@@ -676,6 +736,13 @@ private:
676 MemoryAttribute attribute_mask, MemoryAttribute attribute, 736 MemoryAttribute attribute_mask, MemoryAttribute attribute,
677 MemoryAttribute ignore_mask) const; 737 MemoryAttribute ignore_mask) const;
678 738
739 /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
740 ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
741
742 /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
743 ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
744 std::size_t size) const;
745
679 /** 746 /**
680 * A map covering the entirety of the managed address space, keyed by the `base` field of each 747 * A map covering the entirety of the managed address space, keyed by the `base` field of each
681 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant 748 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant
@@ -701,8 +768,8 @@ private:
701 VAddr map_region_base = 0; 768 VAddr map_region_base = 0;
702 VAddr map_region_end = 0; 769 VAddr map_region_end = 0;
703 770
704 VAddr new_map_region_base = 0; 771 VAddr stack_region_base = 0;
705 VAddr new_map_region_end = 0; 772 VAddr stack_region_end = 0;
706 773
707 VAddr tls_io_region_base = 0; 774 VAddr tls_io_region_base = 0;
708 VAddr tls_io_region_end = 0; 775 VAddr tls_io_region_end = 0;
@@ -711,12 +778,17 @@ private:
711 // the entire virtual address space extents that bound the allocations, including any holes. 778 // the entire virtual address space extents that bound the allocations, including any holes.
712 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous 779 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
713 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. 780 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
714 std::shared_ptr<std::vector<u8>> heap_memory; 781 std::shared_ptr<PhysicalMemory> heap_memory;
715 782
716 // The end of the currently allocated heap. This is not an inclusive 783 // The end of the currently allocated heap. This is not an inclusive
717 // end of the range. This is essentially 'base_address + current_size'. 784 // end of the range. This is essentially 'base_address + current_size'.
718 VAddr heap_end = 0; 785 VAddr heap_end = 0;
719 786
787 // The current amount of memory mapped via MapPhysicalMemory.
788 // This is used here (and in Nintendo's kernel) only for debugging, and does not impact
789 // any behavior.
790 u64 physical_memory_mapped = 0;
791
720 Core::System& system; 792 Core::System& system;
721}; 793};
722} // namespace Kernel 794} // namespace Kernel
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 002efaa7a..a7c55e116 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -15,13 +15,18 @@
15#include "core/file_sys/control_metadata.h" 15#include "core/file_sys/control_metadata.h"
16#include "core/file_sys/patch_manager.h" 16#include "core/file_sys/patch_manager.h"
17#include "core/hle/ipc_helpers.h" 17#include "core/hle/ipc_helpers.h"
18#include "core/hle/kernel/kernel.h"
18#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
19#include "core/hle/service/acc/acc.h" 20#include "core/hle/service/acc/acc.h"
20#include "core/hle/service/acc/acc_aa.h" 21#include "core/hle/service/acc/acc_aa.h"
21#include "core/hle/service/acc/acc_su.h" 22#include "core/hle/service/acc/acc_su.h"
22#include "core/hle/service/acc/acc_u0.h" 23#include "core/hle/service/acc/acc_u0.h"
23#include "core/hle/service/acc/acc_u1.h" 24#include "core/hle/service/acc/acc_u1.h"
25#include "core/hle/service/acc/errors.h"
24#include "core/hle/service/acc/profile_manager.h" 26#include "core/hle/service/acc/profile_manager.h"
27#include "core/hle/service/glue/arp.h"
28#include "core/hle/service/glue/manager.h"
29#include "core/hle/service/sm/sm.h"
25#include "core/loader/loader.h" 30#include "core/loader/loader.h"
26 31
27namespace Service::Account { 32namespace Service::Account {
@@ -312,10 +317,72 @@ void Module::Interface::IsUserRegistrationRequestPermitted(Kernel::HLERequestCon
312 rb.Push(profile_manager->CanSystemRegisterUser()); 317 rb.Push(profile_manager->CanSystemRegisterUser());
313} 318}
314 319
315void Module::Interface::InitializeApplicationInfoOld(Kernel::HLERequestContext& ctx) { 320void Module::Interface::InitializeApplicationInfo(Kernel::HLERequestContext& ctx) {
316 LOG_WARNING(Service_ACC, "(STUBBED) called"); 321 IPC::RequestParser rp{ctx};
322 auto pid = rp.Pop<u64>();
323
324 LOG_DEBUG(Service_ACC, "called, process_id={}", pid);
317 IPC::ResponseBuilder rb{ctx, 2}; 325 IPC::ResponseBuilder rb{ctx, 2};
318 rb.Push(RESULT_SUCCESS); 326 rb.Push(InitializeApplicationInfoBase(pid));
327}
328
329void Module::Interface::InitializeApplicationInfoRestricted(Kernel::HLERequestContext& ctx) {
330 IPC::RequestParser rp{ctx};
331 auto pid = rp.Pop<u64>();
332
333 LOG_WARNING(Service_ACC, "(Partial implementation) called, process_id={}", pid);
334
335 // TODO(ogniK): We require checking if the user actually owns the title and what not. As of
336 // currently, we assume the user owns the title. InitializeApplicationInfoBase SHOULD be called
337 // first then we do extra checks if the game is a digital copy.
338
339 IPC::ResponseBuilder rb{ctx, 2};
340 rb.Push(InitializeApplicationInfoBase(pid));
341}
342
343ResultCode Module::Interface::InitializeApplicationInfoBase(u64 process_id) {
344 if (application_info) {
345 LOG_ERROR(Service_ACC, "Application already initialized");
346 return ERR_ACCOUNTINFO_ALREADY_INITIALIZED;
347 }
348
349 const auto& list = system.Kernel().GetProcessList();
350 const auto iter = std::find_if(list.begin(), list.end(), [&process_id](const auto& process) {
351 return process->GetProcessID() == process_id;
352 });
353
354 if (iter == list.end()) {
355 LOG_ERROR(Service_ACC, "Failed to find process ID");
356 application_info.application_type = ApplicationType::Unknown;
357
358 return ERR_ACCOUNTINFO_BAD_APPLICATION;
359 }
360
361 const auto launch_property = system.GetARPManager().GetLaunchProperty((*iter)->GetTitleID());
362
363 if (launch_property.Failed()) {
364 LOG_ERROR(Service_ACC, "Failed to get launch property");
365 return ERR_ACCOUNTINFO_BAD_APPLICATION;
366 }
367
368 switch (launch_property->base_game_storage_id) {
369 case FileSys::StorageId::GameCard:
370 application_info.application_type = ApplicationType::GameCard;
371 break;
372 case FileSys::StorageId::Host:
373 case FileSys::StorageId::NandUser:
374 case FileSys::StorageId::SdCard:
375 application_info.application_type = ApplicationType::Digital;
376 break;
377 default:
378 LOG_ERROR(Service_ACC, "Invalid game storage ID");
379 return ERR_ACCOUNTINFO_BAD_APPLICATION;
380 }
381
382 LOG_WARNING(Service_ACC, "ApplicationInfo init required");
383 // TODO(ogniK): Actual initalization here
384
385 return RESULT_SUCCESS;
319} 386}
320 387
321void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx) { 388void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h
index 69e4f34fc..7a7dc9ec6 100644
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "core/hle/service/glue/manager.h"
7#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
8 9
9namespace Service::Account { 10namespace Service::Account {
@@ -25,13 +26,34 @@ public:
25 void ListOpenUsers(Kernel::HLERequestContext& ctx); 26 void ListOpenUsers(Kernel::HLERequestContext& ctx);
26 void GetLastOpenedUser(Kernel::HLERequestContext& ctx); 27 void GetLastOpenedUser(Kernel::HLERequestContext& ctx);
27 void GetProfile(Kernel::HLERequestContext& ctx); 28 void GetProfile(Kernel::HLERequestContext& ctx);
28 void InitializeApplicationInfoOld(Kernel::HLERequestContext& ctx); 29 void InitializeApplicationInfo(Kernel::HLERequestContext& ctx);
30 void InitializeApplicationInfoRestricted(Kernel::HLERequestContext& ctx);
29 void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx); 31 void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx);
30 void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); 32 void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx);
31 void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); 33 void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx);
32 void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); 34 void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx);
33 void GetProfileEditor(Kernel::HLERequestContext& ctx); 35 void GetProfileEditor(Kernel::HLERequestContext& ctx);
34 36
37 private:
38 ResultCode InitializeApplicationInfoBase(u64 process_id);
39
40 enum class ApplicationType : u32_le {
41 GameCard = 0,
42 Digital = 1,
43 Unknown = 3,
44 };
45
46 struct ApplicationInfo {
47 Service::Glue::ApplicationLaunchProperty launch_property;
48 ApplicationType application_type;
49
50 constexpr explicit operator bool() const {
51 return launch_property.title_id != 0x0;
52 }
53 };
54
55 ApplicationInfo application_info{};
56
35 protected: 57 protected:
36 std::shared_ptr<Module> module; 58 std::shared_ptr<Module> module;
37 std::shared_ptr<ProfileManager> profile_manager; 59 std::shared_ptr<ProfileManager> profile_manager;
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index 2f239e8c0..0ac19f4ff 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -22,7 +22,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"},
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"},
25 {100, &ACC_U0::InitializeApplicationInfoOld, "InitializeApplicationInfoOld"}, 25 {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, 26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
27 {102, nullptr, "AuthenticateApplicationAsync"}, 27 {102, nullptr, "AuthenticateApplicationAsync"},
28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"},
@@ -31,7 +31,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
31 {120, nullptr, "CreateGuestLoginRequest"}, 31 {120, nullptr, "CreateGuestLoginRequest"},
32 {130, nullptr, "LoadOpenContext"}, 32 {130, nullptr, "LoadOpenContext"},
33 {131, nullptr, "ListOpenContextStoredUsers"}, 33 {131, nullptr, "ListOpenContextStoredUsers"},
34 {140, nullptr, "InitializeApplicationInfo"}, 34 {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"},
35 {141, nullptr, "ListQualifiedUsers"}, 35 {141, nullptr, "ListQualifiedUsers"},
36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, 36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"},
37 }; 37 };
diff --git a/src/core/hle/service/acc/errors.h b/src/core/hle/service/acc/errors.h
new file mode 100644
index 000000000..1f0577239
--- /dev/null
+++ b/src/core/hle/service/acc/errors.h
@@ -0,0 +1,14 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Account {
10
11constexpr ResultCode ERR_ACCOUNTINFO_BAD_APPLICATION{ErrorModule::Account, 22};
12constexpr ResultCode ERR_ACCOUNTINFO_ALREADY_INITIALIZED{ErrorModule::Account, 41};
13
14} // namespace Service::Account
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 33cebb48b..aa2c83937 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -29,7 +29,8 @@
29#include "core/hle/service/am/omm.h" 29#include "core/hle/service/am/omm.h"
30#include "core/hle/service/am/spsm.h" 30#include "core/hle/service/am/spsm.h"
31#include "core/hle/service/am/tcap.h" 31#include "core/hle/service/am/tcap.h"
32#include "core/hle/service/apm/apm.h" 32#include "core/hle/service/apm/controller.h"
33#include "core/hle/service/apm/interface.h"
33#include "core/hle/service/filesystem/filesystem.h" 34#include "core/hle/service/filesystem/filesystem.h"
34#include "core/hle/service/ns/ns.h" 35#include "core/hle/service/ns/ns.h"
35#include "core/hle/service/nvflinger/nvflinger.h" 36#include "core/hle/service/nvflinger/nvflinger.h"
@@ -55,7 +56,8 @@ struct LaunchParameters {
55}; 56};
56static_assert(sizeof(LaunchParameters) == 0x88); 57static_assert(sizeof(LaunchParameters) == 0x88);
57 58
58IWindowController::IWindowController() : ServiceFramework("IWindowController") { 59IWindowController::IWindowController(Core::System& system_)
60 : ServiceFramework("IWindowController"), system{system_} {
59 // clang-format off 61 // clang-format off
60 static const FunctionInfo functions[] = { 62 static const FunctionInfo functions[] = {
61 {0, nullptr, "CreateWindow"}, 63 {0, nullptr, "CreateWindow"},
@@ -74,7 +76,7 @@ IWindowController::IWindowController() : ServiceFramework("IWindowController") {
74IWindowController::~IWindowController() = default; 76IWindowController::~IWindowController() = default;
75 77
76void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) { 78void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) {
77 const u64 process_id = Core::System::GetInstance().Kernel().CurrentProcess()->GetProcessID(); 79 const u64 process_id = system.CurrentProcess()->GetProcessID();
78 80
79 LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id); 81 LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id);
80 82
@@ -230,8 +232,9 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
230 232
231IDebugFunctions::~IDebugFunctions() = default; 233IDebugFunctions::~IDebugFunctions() = default;
232 234
233ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger) 235ISelfController::ISelfController(Core::System& system_,
234 : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger)) { 236 std::shared_ptr<NVFlinger::NVFlinger> nvflinger_)
237 : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger_)) {
235 // clang-format off 238 // clang-format off
236 static const FunctionInfo functions[] = { 239 static const FunctionInfo functions[] = {
237 {0, nullptr, "Exit"}, 240 {0, nullptr, "Exit"},
@@ -265,12 +268,12 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
265 {65, nullptr, "ReportUserIsActive"}, 268 {65, nullptr, "ReportUserIsActive"},
266 {66, nullptr, "GetCurrentIlluminance"}, 269 {66, nullptr, "GetCurrentIlluminance"},
267 {67, nullptr, "IsIlluminanceAvailable"}, 270 {67, nullptr, "IsIlluminanceAvailable"},
268 {68, nullptr, "SetAutoSleepDisabled"}, 271 {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"},
269 {69, nullptr, "IsAutoSleepDisabled"}, 272 {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"},
270 {70, nullptr, "ReportMultimediaError"}, 273 {70, nullptr, "ReportMultimediaError"},
271 {71, nullptr, "GetCurrentIlluminanceEx"}, 274 {71, nullptr, "GetCurrentIlluminanceEx"},
272 {80, nullptr, "SetWirelessPriorityMode"}, 275 {80, nullptr, "SetWirelessPriorityMode"},
273 {90, nullptr, "GetAccumulatedSuspendedTickValue"}, 276 {90, &ISelfController::GetAccumulatedSuspendedTickValue, "GetAccumulatedSuspendedTickValue"},
274 {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, 277 {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"},
275 {100, nullptr, "SetAlbumImageTakenNotificationEnabled"}, 278 {100, nullptr, "SetAlbumImageTakenNotificationEnabled"},
276 {1000, nullptr, "GetDebugStorageChannel"}, 279 {1000, nullptr, "GetDebugStorageChannel"},
@@ -279,14 +282,18 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
279 282
280 RegisterHandlers(functions); 283 RegisterHandlers(functions);
281 284
282 auto& kernel = Core::System::GetInstance().Kernel(); 285 auto& kernel = system_.Kernel();
283 launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, 286 launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
284 "ISelfController:LaunchableEvent"); 287 "ISelfController:LaunchableEvent");
285 288
286 // TODO(ogniK): Figure out where, when and why this event gets signalled 289 // This event is created by AM on the first time GetAccumulatedSuspendedTickChangedEvent() is
290 // called. Yuzu can just create it unconditionally, since it doesn't need to support multiple
291 // ISelfControllers. The event is signaled on creation, and on transition from suspended -> not
292 // suspended if the event has previously been created by a call to
293 // GetAccumulatedSuspendedTickChangedEvent.
287 accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair( 294 accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair(
288 kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent"); 295 kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent");
289 accumulated_suspended_tick_changed_event.writable->Signal(); // Is signalled on creation 296 accumulated_suspended_tick_changed_event.writable->Signal();
290} 297}
291 298
292ISelfController::~ISelfController() = default; 299ISelfController::~ISelfController() = default;
@@ -449,19 +456,54 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
449 rb.Push<u32>(idle_time_detection_extension); 456 rb.Push<u32>(idle_time_detection_extension);
450} 457}
451 458
459void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
460 IPC::RequestParser rp{ctx};
461 is_auto_sleep_disabled = rp.Pop<bool>();
462
463 // On the system itself, if the previous state of is_auto_sleep_disabled
464 // differed from the current value passed in, it'd signify the internal
465 // window manager to update (and also increment some statistics like update counts)
466 //
467 // It'd also indicate this change to an idle handling context.
468 //
469 // However, given we're emulating this behavior, most of this can be ignored
470 // and it's sufficient to simply set the member variable for querying via
471 // IsAutoSleepDisabled().
472
473 LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled);
474
475 IPC::ResponseBuilder rb{ctx, 2};
476 rb.Push(RESULT_SUCCESS);
477}
478
479void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
480 LOG_DEBUG(Service_AM, "called.");
481
482 IPC::ResponseBuilder rb{ctx, 3};
483 rb.Push(RESULT_SUCCESS);
484 rb.Push(is_auto_sleep_disabled);
485}
486
487void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) {
488 LOG_DEBUG(Service_AM, "called.");
489
490 // This command returns the total number of system ticks since ISelfController creation
491 // where the game was suspended. Since Yuzu doesn't implement game suspension, this command
492 // can just always return 0 ticks.
493 IPC::ResponseBuilder rb{ctx, 4};
494 rb.Push(RESULT_SUCCESS);
495 rb.Push<u64>(0);
496}
497
452void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) { 498void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx) {
453 // The implementation of this function is fine as is, the reason we're labelling it as stubbed 499 LOG_DEBUG(Service_AM, "called.");
454 // is because we're currently unsure when and where accumulated_suspended_tick_changed_event is
455 // actually signalled for the time being.
456 LOG_WARNING(Service_AM, "(STUBBED) called");
457 500
458 IPC::ResponseBuilder rb{ctx, 2, 1}; 501 IPC::ResponseBuilder rb{ctx, 2, 1};
459 rb.Push(RESULT_SUCCESS); 502 rb.Push(RESULT_SUCCESS);
460 rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable); 503 rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable);
461} 504}
462 505
463AppletMessageQueue::AppletMessageQueue() { 506AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
464 auto& kernel = Core::System::GetInstance().Kernel();
465 on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, 507 on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
466 "AMMessageQueue:OnMessageRecieved"); 508 "AMMessageQueue:OnMessageRecieved");
467 on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair( 509 on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
@@ -508,8 +550,9 @@ void AppletMessageQueue::OperationModeChanged() {
508 on_operation_mode_changed.writable->Signal(); 550 on_operation_mode_changed.writable->Signal();
509} 551}
510 552
511ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue) 553ICommonStateGetter::ICommonStateGetter(Core::System& system,
512 : ServiceFramework("ICommonStateGetter"), msg_queue(std::move(msg_queue)) { 554 std::shared_ptr<AppletMessageQueue> msg_queue)
555 : ServiceFramework("ICommonStateGetter"), system(system), msg_queue(std::move(msg_queue)) {
513 // clang-format off 556 // clang-format off
514 static const FunctionInfo functions[] = { 557 static const FunctionInfo functions[] = {
515 {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, 558 {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"},
@@ -542,7 +585,7 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q
542 {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, 585 {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"},
543 {64, nullptr, "SetTvPowerStateMatchingMode"}, 586 {64, nullptr, "SetTvPowerStateMatchingMode"},
544 {65, nullptr, "GetApplicationIdByContentActionName"}, 587 {65, nullptr, "GetApplicationIdByContentActionName"},
545 {66, nullptr, "SetCpuBoostMode"}, 588 {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"},
546 {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, 589 {80, nullptr, "PerformSystemButtonPressingIfInFocus"},
547 {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, 590 {90, nullptr, "SetPerformanceConfigurationChangedNotification"},
548 {91, nullptr, "GetCurrentPerformanceConfiguration"}, 591 {91, nullptr, "GetCurrentPerformanceConfiguration"},
@@ -623,6 +666,16 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
623 } 666 }
624} 667}
625 668
669void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
670 LOG_DEBUG(Service_AM, "called, forwarding to APM:SYS");
671
672 const auto& sm = system.ServiceManager();
673 const auto apm_sys = sm.GetService<APM::APM_Sys>("apm:sys");
674 ASSERT(apm_sys != nullptr);
675
676 apm_sys->SetCpuBoostMode(ctx);
677}
678
626IStorage::IStorage(std::vector<u8> buffer) 679IStorage::IStorage(std::vector<u8> buffer)
627 : ServiceFramework("IStorage"), buffer(std::move(buffer)) { 680 : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
628 // clang-format off 681 // clang-format off
@@ -651,13 +704,11 @@ void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
651} 704}
652 705
653void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { 706void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
654 const bool use_docked_mode{Settings::values.use_docked_mode}; 707 LOG_DEBUG(Service_AM, "called");
655 LOG_DEBUG(Service_AM, "called, use_docked_mode={}", use_docked_mode);
656 708
657 IPC::ResponseBuilder rb{ctx, 3}; 709 IPC::ResponseBuilder rb{ctx, 3};
658 rb.Push(RESULT_SUCCESS); 710 rb.Push(RESULT_SUCCESS);
659 rb.Push(static_cast<u32>(use_docked_mode ? APM::PerformanceMode::Docked 711 rb.PushEnum(system.GetAPMController().GetCurrentPerformanceMode());
660 : APM::PerformanceMode::Handheld));
661} 712}
662 713
663class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { 714class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> {
@@ -887,9 +938,8 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
887 rb.Push(RESULT_SUCCESS); 938 rb.Push(RESULT_SUCCESS);
888} 939}
889 940
890ILibraryAppletCreator::ILibraryAppletCreator(u64 current_process_title_id) 941ILibraryAppletCreator::ILibraryAppletCreator(Core::System& system_)
891 : ServiceFramework("ILibraryAppletCreator"), 942 : ServiceFramework("ILibraryAppletCreator"), system{system_} {
892 current_process_title_id(current_process_title_id) {
893 static const FunctionInfo functions[] = { 943 static const FunctionInfo functions[] = {
894 {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"}, 944 {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"},
895 {1, nullptr, "TerminateAllLibraryApplets"}, 945 {1, nullptr, "TerminateAllLibraryApplets"},
@@ -911,8 +961,8 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx)
911 LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}", 961 LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}",
912 static_cast<u32>(applet_id), applet_mode); 962 static_cast<u32>(applet_id), applet_mode);
913 963
914 const auto& applet_manager{Core::System::GetInstance().GetAppletManager()}; 964 const auto& applet_manager{system.GetAppletManager()};
915 const auto applet = applet_manager.GetApplet(applet_id, current_process_title_id); 965 const auto applet = applet_manager.GetApplet(applet_id);
916 966
917 if (applet == nullptr) { 967 if (applet == nullptr) {
918 LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id)); 968 LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id));
@@ -949,8 +999,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
949 const auto handle{rp.Pop<Kernel::Handle>()}; 999 const auto handle{rp.Pop<Kernel::Handle>()};
950 1000
951 const auto transfer_mem = 1001 const auto transfer_mem =
952 Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>( 1002 system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle);
953 handle);
954 1003
955 if (transfer_mem == nullptr) { 1004 if (transfer_mem == nullptr) {
956 LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle); 1005 LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle);
@@ -968,7 +1017,8 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
968 rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory))); 1017 rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory)));
969} 1018}
970 1019
971IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") { 1020IApplicationFunctions::IApplicationFunctions(Core::System& system_)
1021 : ServiceFramework("IApplicationFunctions"), system{system_} {
972 // clang-format off 1022 // clang-format off
973 static const FunctionInfo functions[] = { 1023 static const FunctionInfo functions[] = {
974 {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"}, 1024 {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1007,6 +1057,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
1007 {120, nullptr, "ExecuteProgram"}, 1057 {120, nullptr, "ExecuteProgram"},
1008 {121, nullptr, "ClearUserChannel"}, 1058 {121, nullptr, "ClearUserChannel"},
1009 {122, nullptr, "UnpopToUserChannel"}, 1059 {122, nullptr, "UnpopToUserChannel"},
1060 {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"},
1010 {500, nullptr, "StartContinuousRecordingFlushForDebug"}, 1061 {500, nullptr, "StartContinuousRecordingFlushForDebug"},
1011 {1000, nullptr, "CreateMovieMaker"}, 1062 {1000, nullptr, "CreateMovieMaker"},
1012 {1001, nullptr, "PrepareForJit"}, 1063 {1001, nullptr, "PrepareForJit"},
@@ -1014,6 +1065,10 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
1014 // clang-format on 1065 // clang-format on
1015 1066
1016 RegisterHandlers(functions); 1067 RegisterHandlers(functions);
1068
1069 auto& kernel = Core::System::GetInstance().Kernel();
1070 gpu_error_detected_event = Kernel::WritableEvent::CreateEventPair(
1071 kernel, Kernel::ResetType::Manual, "IApplicationFunctions:GpuErrorDetectedSystemEvent");
1017} 1072}
1018 1073
1019IApplicationFunctions::~IApplicationFunctions() = default; 1074IApplicationFunctions::~IApplicationFunctions() = default;
@@ -1125,7 +1180,7 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) {
1125 // Get supported languages from NACP, if possible 1180 // Get supported languages from NACP, if possible
1126 // Default to 0 (all languages supported) 1181 // Default to 0 (all languages supported)
1127 u32 supported_languages = 0; 1182 u32 supported_languages = 0;
1128 FileSys::PatchManager pm{Core::System::GetInstance().CurrentProcess()->GetTitleID()}; 1183 FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()};
1129 1184
1130 const auto res = pm.GetControlMetadata(); 1185 const auto res = pm.GetControlMetadata();
1131 if (res.first != nullptr) { 1186 if (res.first != nullptr) {
@@ -1133,8 +1188,8 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) {
1133 } 1188 }
1134 1189
1135 // Call IApplicationManagerInterface implementation. 1190 // Call IApplicationManagerInterface implementation.
1136 auto& service_manager = Core::System::GetInstance().ServiceManager(); 1191 auto& service_manager = system.ServiceManager();
1137 auto ns_am2 = service_manager.GetService<Service::NS::NS>("ns:am2"); 1192 auto ns_am2 = service_manager.GetService<NS::NS>("ns:am2");
1138 auto app_man = ns_am2->GetApplicationManagerInterface(); 1193 auto app_man = ns_am2->GetApplicationManagerInterface();
1139 1194
1140 // Get desired application language 1195 // Get desired application language
@@ -1206,8 +1261,8 @@ void IApplicationFunctions::ExtendSaveData(Kernel::HLERequestContext& ctx) {
1206 "new_journal={:016X}", 1261 "new_journal={:016X}",
1207 static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size); 1262 static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size);
1208 1263
1209 FileSystem::WriteSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id, 1264 const auto title_id = system.CurrentProcess()->GetTitleID();
1210 {new_normal_size, new_journal_size}); 1265 FileSystem::WriteSaveDataSize(type, title_id, user_id, {new_normal_size, new_journal_size});
1211 1266
1212 IPC::ResponseBuilder rb{ctx, 4}; 1267 IPC::ResponseBuilder rb{ctx, 4};
1213 rb.Push(RESULT_SUCCESS); 1268 rb.Push(RESULT_SUCCESS);
@@ -1226,8 +1281,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) {
1226 LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type), 1281 LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type),
1227 user_id[1], user_id[0]); 1282 user_id[1], user_id[0]);
1228 1283
1229 const auto size = 1284 const auto title_id = system.CurrentProcess()->GetTitleID();
1230 FileSystem::ReadSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id); 1285 const auto size = FileSystem::ReadSaveDataSize(type, title_id, user_id);
1231 1286
1232 IPC::ResponseBuilder rb{ctx, 6}; 1287 IPC::ResponseBuilder rb{ctx, 6};
1233 rb.Push(RESULT_SUCCESS); 1288 rb.Push(RESULT_SUCCESS);
@@ -1235,11 +1290,19 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) {
1235 rb.Push(size.journal); 1290 rb.Push(size.journal);
1236} 1291}
1237 1292
1293void IApplicationFunctions::GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx) {
1294 LOG_WARNING(Service_AM, "(STUBBED) called");
1295
1296 IPC::ResponseBuilder rb{ctx, 2, 1};
1297 rb.Push(RESULT_SUCCESS);
1298 rb.PushCopyObjects(gpu_error_detected_event.readable);
1299}
1300
1238void InstallInterfaces(SM::ServiceManager& service_manager, 1301void InstallInterfaces(SM::ServiceManager& service_manager,
1239 std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) { 1302 std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) {
1240 auto message_queue = std::make_shared<AppletMessageQueue>(); 1303 auto message_queue = std::make_shared<AppletMessageQueue>(system.Kernel());
1241 message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); // Needed on 1304 // Needed on game boot
1242 // game boot 1305 message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged);
1243 1306
1244 std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager); 1307 std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager);
1245 std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager); 1308 std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 4ea609d23..28f870302 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -10,12 +10,15 @@
10#include "core/hle/kernel/writable_event.h" 10#include "core/hle/kernel/writable_event.h"
11#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
12 12
13namespace Service { 13namespace Kernel {
14namespace NVFlinger { 14class KernelCore;
15}
16
17namespace Service::NVFlinger {
15class NVFlinger; 18class NVFlinger;
16} 19}
17 20
18namespace AM { 21namespace Service::AM {
19 22
20enum SystemLanguage { 23enum SystemLanguage {
21 Japanese = 0, 24 Japanese = 0,
@@ -47,7 +50,7 @@ public:
47 PerformanceModeChanged = 31, 50 PerformanceModeChanged = 31,
48 }; 51 };
49 52
50 AppletMessageQueue(); 53 explicit AppletMessageQueue(Kernel::KernelCore& kernel);
51 ~AppletMessageQueue(); 54 ~AppletMessageQueue();
52 55
53 const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; 56 const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const;
@@ -65,12 +68,14 @@ private:
65 68
66class IWindowController final : public ServiceFramework<IWindowController> { 69class IWindowController final : public ServiceFramework<IWindowController> {
67public: 70public:
68 IWindowController(); 71 explicit IWindowController(Core::System& system_);
69 ~IWindowController() override; 72 ~IWindowController() override;
70 73
71private: 74private:
72 void GetAppletResourceUserId(Kernel::HLERequestContext& ctx); 75 void GetAppletResourceUserId(Kernel::HLERequestContext& ctx);
73 void AcquireForegroundRights(Kernel::HLERequestContext& ctx); 76 void AcquireForegroundRights(Kernel::HLERequestContext& ctx);
77
78 Core::System& system;
74}; 79};
75 80
76class IAudioController final : public ServiceFramework<IAudioController> { 81class IAudioController final : public ServiceFramework<IAudioController> {
@@ -113,7 +118,8 @@ public:
113 118
114class ISelfController final : public ServiceFramework<ISelfController> { 119class ISelfController final : public ServiceFramework<ISelfController> {
115public: 120public:
116 explicit ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger); 121 explicit ISelfController(Core::System& system_,
122 std::shared_ptr<NVFlinger::NVFlinger> nvflinger_);
117 ~ISelfController() override; 123 ~ISelfController() override;
118 124
119private: 125private:
@@ -133,6 +139,9 @@ private:
133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 139 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 140 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 141 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
142 void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx);
143 void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx);
144 void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx);
136 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); 145 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx);
137 146
138 std::shared_ptr<NVFlinger::NVFlinger> nvflinger; 147 std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
@@ -141,11 +150,13 @@ private:
141 150
142 u32 idle_time_detection_extension = 0; 151 u32 idle_time_detection_extension = 0;
143 u64 num_fatal_sections_entered = 0; 152 u64 num_fatal_sections_entered = 0;
153 bool is_auto_sleep_disabled = false;
144}; 154};
145 155
146class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { 156class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
147public: 157public:
148 explicit ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue); 158 explicit ICommonStateGetter(Core::System& system,
159 std::shared_ptr<AppletMessageQueue> msg_queue);
149 ~ICommonStateGetter() override; 160 ~ICommonStateGetter() override;
150 161
151private: 162private:
@@ -167,7 +178,9 @@ private:
167 void GetPerformanceMode(Kernel::HLERequestContext& ctx); 178 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
168 void GetBootMode(Kernel::HLERequestContext& ctx); 179 void GetBootMode(Kernel::HLERequestContext& ctx);
169 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); 180 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx);
181 void SetCpuBoostMode(Kernel::HLERequestContext& ctx);
170 182
183 Core::System& system;
171 std::shared_ptr<AppletMessageQueue> msg_queue; 184 std::shared_ptr<AppletMessageQueue> msg_queue;
172}; 185};
173 186
@@ -201,7 +214,7 @@ private:
201 214
202class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> { 215class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> {
203public: 216public:
204 ILibraryAppletCreator(u64 current_process_title_id); 217 explicit ILibraryAppletCreator(Core::System& system_);
205 ~ILibraryAppletCreator() override; 218 ~ILibraryAppletCreator() override;
206 219
207private: 220private:
@@ -209,12 +222,12 @@ private:
209 void CreateStorage(Kernel::HLERequestContext& ctx); 222 void CreateStorage(Kernel::HLERequestContext& ctx);
210 void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx); 223 void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx);
211 224
212 u64 current_process_title_id; 225 Core::System& system;
213}; 226};
214 227
215class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> { 228class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> {
216public: 229public:
217 IApplicationFunctions(); 230 explicit IApplicationFunctions(Core::System& system_);
218 ~IApplicationFunctions() override; 231 ~IApplicationFunctions() override;
219 232
220private: 233private:
@@ -235,6 +248,10 @@ private:
235 void BeginBlockingHomeButton(Kernel::HLERequestContext& ctx); 248 void BeginBlockingHomeButton(Kernel::HLERequestContext& ctx);
236 void EndBlockingHomeButton(Kernel::HLERequestContext& ctx); 249 void EndBlockingHomeButton(Kernel::HLERequestContext& ctx);
237 void EnableApplicationCrashReport(Kernel::HLERequestContext& ctx); 250 void EnableApplicationCrashReport(Kernel::HLERequestContext& ctx);
251 void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
252
253 Kernel::EventPair gpu_error_detected_event;
254 Core::System& system;
238}; 255};
239 256
240class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> { 257class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> {
@@ -268,5 +285,4 @@ public:
268void InstallInterfaces(SM::ServiceManager& service_manager, 285void InstallInterfaces(SM::ServiceManager& service_manager,
269 std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system); 286 std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system);
270 287
271} // namespace AM 288} // namespace Service::AM
272} // namespace Service
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index fe5beb8f9..e454b77d8 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -42,7 +42,7 @@ private:
42 42
43 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 43 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
44 rb.Push(RESULT_SUCCESS); 44 rb.Push(RESULT_SUCCESS);
45 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 45 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
46 } 46 }
47 47
48 void GetSelfController(Kernel::HLERequestContext& ctx) { 48 void GetSelfController(Kernel::HLERequestContext& ctx) {
@@ -50,7 +50,7 @@ private:
50 50
51 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 51 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
52 rb.Push(RESULT_SUCCESS); 52 rb.Push(RESULT_SUCCESS);
53 rb.PushIpcInterface<ISelfController>(nvflinger); 53 rb.PushIpcInterface<ISelfController>(system, nvflinger);
54 } 54 }
55 55
56 void GetWindowController(Kernel::HLERequestContext& ctx) { 56 void GetWindowController(Kernel::HLERequestContext& ctx) {
@@ -58,7 +58,7 @@ private:
58 58
59 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 59 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
60 rb.Push(RESULT_SUCCESS); 60 rb.Push(RESULT_SUCCESS);
61 rb.PushIpcInterface<IWindowController>(); 61 rb.PushIpcInterface<IWindowController>(system);
62 } 62 }
63 63
64 void GetAudioController(Kernel::HLERequestContext& ctx) { 64 void GetAudioController(Kernel::HLERequestContext& ctx) {
@@ -98,7 +98,7 @@ private:
98 98
99 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 99 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
100 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
101 rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); 101 rb.PushIpcInterface<ILibraryAppletCreator>(system);
102 } 102 }
103 103
104 void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { 104 void GetApplicationFunctions(Kernel::HLERequestContext& ctx) {
@@ -106,7 +106,7 @@ private:
106 106
107 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 107 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
108 rb.Push(RESULT_SUCCESS); 108 rb.Push(RESULT_SUCCESS);
109 rb.PushIpcInterface<IApplicationFunctions>(); 109 rb.PushIpcInterface<IApplicationFunctions>(system);
110 } 110 }
111 111
112 std::shared_ptr<NVFlinger::NVFlinger> nvflinger; 112 std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
@@ -146,7 +146,7 @@ private:
146 146
147 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 147 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
148 rb.Push(RESULT_SUCCESS); 148 rb.Push(RESULT_SUCCESS);
149 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 149 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
150 } 150 }
151 151
152 void GetSelfController(Kernel::HLERequestContext& ctx) { 152 void GetSelfController(Kernel::HLERequestContext& ctx) {
@@ -154,7 +154,7 @@ private:
154 154
155 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 155 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
156 rb.Push(RESULT_SUCCESS); 156 rb.Push(RESULT_SUCCESS);
157 rb.PushIpcInterface<ISelfController>(nvflinger); 157 rb.PushIpcInterface<ISelfController>(system, nvflinger);
158 } 158 }
159 159
160 void GetWindowController(Kernel::HLERequestContext& ctx) { 160 void GetWindowController(Kernel::HLERequestContext& ctx) {
@@ -162,7 +162,7 @@ private:
162 162
163 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 163 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
164 rb.Push(RESULT_SUCCESS); 164 rb.Push(RESULT_SUCCESS);
165 rb.PushIpcInterface<IWindowController>(); 165 rb.PushIpcInterface<IWindowController>(system);
166 } 166 }
167 167
168 void GetAudioController(Kernel::HLERequestContext& ctx) { 168 void GetAudioController(Kernel::HLERequestContext& ctx) {
@@ -194,7 +194,7 @@ private:
194 194
195 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 195 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
196 rb.Push(RESULT_SUCCESS); 196 rb.Push(RESULT_SUCCESS);
197 rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); 197 rb.PushIpcInterface<ILibraryAppletCreator>(system);
198 } 198 }
199 199
200 void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) { 200 void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp
index 6e255fe95..a2ffaa440 100644
--- a/src/core/hle/service/am/applet_oe.cpp
+++ b/src/core/hle/service/am/applet_oe.cpp
@@ -4,7 +4,6 @@
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/kernel/process.h"
8#include "core/hle/service/am/am.h" 7#include "core/hle/service/am/am.h"
9#include "core/hle/service/am/applet_oe.h" 8#include "core/hle/service/am/applet_oe.h"
10#include "core/hle/service/nvflinger/nvflinger.h" 9#include "core/hle/service/nvflinger/nvflinger.h"
@@ -64,7 +63,7 @@ private:
64 63
65 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 64 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
66 rb.Push(RESULT_SUCCESS); 65 rb.Push(RESULT_SUCCESS);
67 rb.PushIpcInterface<IWindowController>(); 66 rb.PushIpcInterface<IWindowController>(system);
68 } 67 }
69 68
70 void GetSelfController(Kernel::HLERequestContext& ctx) { 69 void GetSelfController(Kernel::HLERequestContext& ctx) {
@@ -72,7 +71,7 @@ private:
72 71
73 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 72 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
74 rb.Push(RESULT_SUCCESS); 73 rb.Push(RESULT_SUCCESS);
75 rb.PushIpcInterface<ISelfController>(nvflinger); 74 rb.PushIpcInterface<ISelfController>(system, nvflinger);
76 } 75 }
77 76
78 void GetCommonStateGetter(Kernel::HLERequestContext& ctx) { 77 void GetCommonStateGetter(Kernel::HLERequestContext& ctx) {
@@ -80,7 +79,7 @@ private:
80 79
81 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 80 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
82 rb.Push(RESULT_SUCCESS); 81 rb.Push(RESULT_SUCCESS);
83 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 82 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
84 } 83 }
85 84
86 void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { 85 void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) {
@@ -88,7 +87,7 @@ private:
88 87
89 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 88 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
90 rb.Push(RESULT_SUCCESS); 89 rb.Push(RESULT_SUCCESS);
91 rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID()); 90 rb.PushIpcInterface<ILibraryAppletCreator>(system);
92 } 91 }
93 92
94 void GetApplicationFunctions(Kernel::HLERequestContext& ctx) { 93 void GetApplicationFunctions(Kernel::HLERequestContext& ctx) {
@@ -96,7 +95,7 @@ private:
96 95
97 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 96 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
98 rb.Push(RESULT_SUCCESS); 97 rb.Push(RESULT_SUCCESS);
99 rb.PushIpcInterface<IApplicationFunctions>(); 98 rb.PushIpcInterface<IApplicationFunctions>(system);
100 } 99 }
101 100
102 std::shared_ptr<NVFlinger::NVFlinger> nvflinger; 101 std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 6bdba2468..d2e35362f 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -23,8 +23,7 @@
23 23
24namespace Service::AM::Applets { 24namespace Service::AM::Applets {
25 25
26AppletDataBroker::AppletDataBroker() { 26AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) {
27 auto& kernel = Core::System::GetInstance().Kernel();
28 state_changed_event = Kernel::WritableEvent::CreateEventPair( 27 state_changed_event = Kernel::WritableEvent::CreateEventPair(
29 kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent"); 28 kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
30 pop_out_data_event = Kernel::WritableEvent::CreateEventPair( 29 pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
@@ -121,7 +120,7 @@ Kernel::SharedPtr<Kernel::ReadableEvent> AppletDataBroker::GetStateChangedEvent(
121 return state_changed_event.readable; 120 return state_changed_event.readable;
122} 121}
123 122
124Applet::Applet() = default; 123Applet::Applet(Kernel::KernelCore& kernel_) : broker{kernel_} {}
125 124
126Applet::~Applet() = default; 125Applet::~Applet() = default;
127 126
@@ -154,7 +153,7 @@ AppletFrontendSet::AppletFrontendSet(AppletFrontendSet&&) noexcept = default;
154 153
155AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default; 154AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default;
156 155
157AppletManager::AppletManager() = default; 156AppletManager::AppletManager(Core::System& system_) : system{system_} {}
158 157
159AppletManager::~AppletManager() = default; 158AppletManager::~AppletManager() = default;
160 159
@@ -216,28 +215,28 @@ void AppletManager::ClearAll() {
216 frontend = {}; 215 frontend = {};
217} 216}
218 217
219std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id, u64 current_process_title_id) const { 218std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const {
220 switch (id) { 219 switch (id) {
221 case AppletId::Auth: 220 case AppletId::Auth:
222 return std::make_shared<Auth>(*frontend.parental_controls); 221 return std::make_shared<Auth>(system, *frontend.parental_controls);
223 case AppletId::Error: 222 case AppletId::Error:
224 return std::make_shared<Error>(*frontend.error); 223 return std::make_shared<Error>(system, *frontend.error);
225 case AppletId::ProfileSelect: 224 case AppletId::ProfileSelect:
226 return std::make_shared<ProfileSelect>(*frontend.profile_select); 225 return std::make_shared<ProfileSelect>(system, *frontend.profile_select);
227 case AppletId::SoftwareKeyboard: 226 case AppletId::SoftwareKeyboard:
228 return std::make_shared<SoftwareKeyboard>(*frontend.software_keyboard); 227 return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard);
229 case AppletId::PhotoViewer: 228 case AppletId::PhotoViewer:
230 return std::make_shared<PhotoViewer>(*frontend.photo_viewer); 229 return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer);
231 case AppletId::LibAppletShop: 230 case AppletId::LibAppletShop:
232 return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id, 231 return std::make_shared<WebBrowser>(system, *frontend.web_browser,
233 frontend.e_commerce.get()); 232 frontend.e_commerce.get());
234 case AppletId::LibAppletOff: 233 case AppletId::LibAppletOff:
235 return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id); 234 return std::make_shared<WebBrowser>(system, *frontend.web_browser);
236 default: 235 default:
237 UNIMPLEMENTED_MSG( 236 UNIMPLEMENTED_MSG(
238 "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.", 237 "No backend implementation exists for applet_id={:02X}! Falling back to stub applet.",
239 static_cast<u8>(id)); 238 static_cast<u8>(id));
240 return std::make_shared<StubApplet>(id); 239 return std::make_shared<StubApplet>(system, id);
241 } 240 }
242} 241}
243 242
diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h
index adc973dad..764c3418c 100644
--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -12,6 +12,10 @@
12 12
13union ResultCode; 13union ResultCode;
14 14
15namespace Core {
16class System;
17}
18
15namespace Core::Frontend { 19namespace Core::Frontend {
16class ECommerceApplet; 20class ECommerceApplet;
17class ErrorApplet; 21class ErrorApplet;
@@ -22,6 +26,10 @@ class SoftwareKeyboardApplet;
22class WebBrowserApplet; 26class WebBrowserApplet;
23} // namespace Core::Frontend 27} // namespace Core::Frontend
24 28
29namespace Kernel {
30class KernelCore;
31}
32
25namespace Service::AM { 33namespace Service::AM {
26 34
27class IStorage; 35class IStorage;
@@ -53,7 +61,7 @@ enum class AppletId : u32 {
53 61
54class AppletDataBroker final { 62class AppletDataBroker final {
55public: 63public:
56 AppletDataBroker(); 64 explicit AppletDataBroker(Kernel::KernelCore& kernel_);
57 ~AppletDataBroker(); 65 ~AppletDataBroker();
58 66
59 struct RawChannelData { 67 struct RawChannelData {
@@ -108,7 +116,7 @@ private:
108 116
109class Applet { 117class Applet {
110public: 118public:
111 Applet(); 119 explicit Applet(Kernel::KernelCore& kernel_);
112 virtual ~Applet(); 120 virtual ~Applet();
113 121
114 virtual void Initialize(); 122 virtual void Initialize();
@@ -179,7 +187,7 @@ struct AppletFrontendSet {
179 187
180class AppletManager { 188class AppletManager {
181public: 189public:
182 AppletManager(); 190 explicit AppletManager(Core::System& system_);
183 ~AppletManager(); 191 ~AppletManager();
184 192
185 void SetAppletFrontendSet(AppletFrontendSet set); 193 void SetAppletFrontendSet(AppletFrontendSet set);
@@ -187,10 +195,11 @@ public:
187 void SetDefaultAppletsIfMissing(); 195 void SetDefaultAppletsIfMissing();
188 void ClearAll(); 196 void ClearAll();
189 197
190 std::shared_ptr<Applet> GetApplet(AppletId id, u64 current_process_title_id) const; 198 std::shared_ptr<Applet> GetApplet(AppletId id) const;
191 199
192private: 200private:
193 AppletFrontendSet frontend; 201 AppletFrontendSet frontend;
202 Core::System& system;
194}; 203};
195 204
196} // namespace Applets 205} // namespace Applets
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
index af3a900f8..a7db26725 100644
--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -85,7 +85,8 @@ ResultCode Decode64BitError(u64 error) {
85 85
86} // Anonymous namespace 86} // Anonymous namespace
87 87
88Error::Error(const Core::Frontend::ErrorApplet& frontend) : frontend(frontend) {} 88Error::Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_)
89 : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {}
89 90
90Error::~Error() = default; 91Error::~Error() = default;
91 92
@@ -145,8 +146,8 @@ void Error::Execute() {
145 } 146 }
146 147
147 const auto callback = [this] { DisplayCompleted(); }; 148 const auto callback = [this] { DisplayCompleted(); };
148 const auto title_id = Core::CurrentProcess()->GetTitleID(); 149 const auto title_id = system.CurrentProcess()->GetTitleID();
149 const auto& reporter{Core::System::GetInstance().GetReporter()}; 150 const auto& reporter{system.GetReporter()};
150 151
151 switch (mode) { 152 switch (mode) {
152 case ErrorAppletMode::ShowError: 153 case ErrorAppletMode::ShowError:
diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/error.h
index a3590d181..a105cdb0c 100644
--- a/src/core/hle/service/am/applets/error.h
+++ b/src/core/hle/service/am/applets/error.h
@@ -7,6 +7,10 @@
7#include "core/hle/result.h" 7#include "core/hle/result.h"
8#include "core/hle/service/am/applets/applets.h" 8#include "core/hle/service/am/applets/applets.h"
9 9
10namespace Core {
11class System;
12}
13
10namespace Service::AM::Applets { 14namespace Service::AM::Applets {
11 15
12enum class ErrorAppletMode : u8 { 16enum class ErrorAppletMode : u8 {
@@ -21,7 +25,7 @@ enum class ErrorAppletMode : u8 {
21 25
22class Error final : public Applet { 26class Error final : public Applet {
23public: 27public:
24 explicit Error(const Core::Frontend::ErrorApplet& frontend); 28 explicit Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_);
25 ~Error() override; 29 ~Error() override;
26 30
27 void Initialize() override; 31 void Initialize() override;
@@ -42,6 +46,7 @@ private:
42 std::unique_ptr<ErrorArguments> args; 46 std::unique_ptr<ErrorArguments> args;
43 47
44 bool complete = false; 48 bool complete = false;
49 Core::System& system;
45}; 50};
46 51
47} // namespace Service::AM::Applets 52} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp
index e0def8dff..328438a1d 100644
--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -37,7 +37,8 @@ static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix)
37 } 37 }
38} 38}
39 39
40Auth::Auth(Core::Frontend::ParentalControlsApplet& frontend) : frontend(frontend) {} 40Auth::Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_)
41 : Applet{system_.Kernel()}, frontend(frontend_) {}
41 42
42Auth::~Auth() = default; 43Auth::~Auth() = default;
43 44
@@ -151,7 +152,8 @@ void Auth::AuthFinished(bool successful) {
151 broker.SignalStateChanged(); 152 broker.SignalStateChanged();
152} 153}
153 154
154PhotoViewer::PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend) : frontend(frontend) {} 155PhotoViewer::PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_)
156 : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {}
155 157
156PhotoViewer::~PhotoViewer() = default; 158PhotoViewer::~PhotoViewer() = default;
157 159
@@ -185,7 +187,7 @@ void PhotoViewer::Execute() {
185 const auto callback = [this] { ViewFinished(); }; 187 const auto callback = [this] { ViewFinished(); };
186 switch (mode) { 188 switch (mode) {
187 case PhotoViewerAppletMode::CurrentApp: 189 case PhotoViewerAppletMode::CurrentApp:
188 frontend.ShowPhotosForApplication(Core::CurrentProcess()->GetTitleID(), callback); 190 frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback);
189 break; 191 break;
190 case PhotoViewerAppletMode::AllApps: 192 case PhotoViewerAppletMode::AllApps:
191 frontend.ShowAllPhotos(callback); 193 frontend.ShowAllPhotos(callback);
@@ -200,7 +202,8 @@ void PhotoViewer::ViewFinished() {
200 broker.SignalStateChanged(); 202 broker.SignalStateChanged();
201} 203}
202 204
203StubApplet::StubApplet(AppletId id) : id(id) {} 205StubApplet::StubApplet(Core::System& system_, AppletId id_)
206 : Applet{system_.Kernel()}, id(id_), system{system_} {}
204 207
205StubApplet::~StubApplet() = default; 208StubApplet::~StubApplet() = default;
206 209
@@ -209,7 +212,7 @@ void StubApplet::Initialize() {
209 Applet::Initialize(); 212 Applet::Initialize();
210 213
211 const auto data = broker.PeekDataToAppletForDebug(); 214 const auto data = broker.PeekDataToAppletForDebug();
212 Core::System::GetInstance().GetReporter().SaveUnimplementedAppletReport( 215 system.GetReporter().SaveUnimplementedAppletReport(
213 static_cast<u32>(id), common_args.arguments_version, common_args.library_version, 216 static_cast<u32>(id), common_args.arguments_version, common_args.library_version,
214 common_args.theme_color, common_args.play_startup_sound, common_args.system_tick, 217 common_args.theme_color, common_args.play_startup_sound, common_args.system_tick,
215 data.normal, data.interactive); 218 data.normal, data.interactive);
diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/general_backend.h
index 0da252044..cfa2df369 100644
--- a/src/core/hle/service/am/applets/general_backend.h
+++ b/src/core/hle/service/am/applets/general_backend.h
@@ -6,6 +6,10 @@
6 6
7#include "core/hle/service/am/applets/applets.h" 7#include "core/hle/service/am/applets/applets.h"
8 8
9namespace Core {
10class System;
11}
12
9namespace Service::AM::Applets { 13namespace Service::AM::Applets {
10 14
11enum class AuthAppletType : u32 { 15enum class AuthAppletType : u32 {
@@ -16,7 +20,7 @@ enum class AuthAppletType : u32 {
16 20
17class Auth final : public Applet { 21class Auth final : public Applet {
18public: 22public:
19 explicit Auth(Core::Frontend::ParentalControlsApplet& frontend); 23 explicit Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_);
20 ~Auth() override; 24 ~Auth() override;
21 25
22 void Initialize() override; 26 void Initialize() override;
@@ -45,7 +49,7 @@ enum class PhotoViewerAppletMode : u8 {
45 49
46class PhotoViewer final : public Applet { 50class PhotoViewer final : public Applet {
47public: 51public:
48 explicit PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend); 52 explicit PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_);
49 ~PhotoViewer() override; 53 ~PhotoViewer() override;
50 54
51 void Initialize() override; 55 void Initialize() override;
@@ -60,11 +64,12 @@ private:
60 const Core::Frontend::PhotoViewerApplet& frontend; 64 const Core::Frontend::PhotoViewerApplet& frontend;
61 bool complete = false; 65 bool complete = false;
62 PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp; 66 PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp;
67 Core::System& system;
63}; 68};
64 69
65class StubApplet final : public Applet { 70class StubApplet final : public Applet {
66public: 71public:
67 explicit StubApplet(AppletId id); 72 explicit StubApplet(Core::System& system_, AppletId id_);
68 ~StubApplet() override; 73 ~StubApplet() override;
69 74
70 void Initialize() override; 75 void Initialize() override;
@@ -76,6 +81,7 @@ public:
76 81
77private: 82private:
78 AppletId id; 83 AppletId id;
84 Core::System& system;
79}; 85};
80 86
81} // namespace Service::AM::Applets 87} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp
index 57b5419e8..3eba696ca 100644
--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -15,8 +15,9 @@ namespace Service::AM::Applets {
15 15
16constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1}; 16constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1};
17 17
18ProfileSelect::ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend) 18ProfileSelect::ProfileSelect(Core::System& system_,
19 : frontend(frontend) {} 19 const Core::Frontend::ProfileSelectApplet& frontend_)
20 : Applet{system_.Kernel()}, frontend(frontend_) {}
20 21
21ProfileSelect::~ProfileSelect() = default; 22ProfileSelect::~ProfileSelect() = default;
22 23
diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/profile_select.h
index 563cd744a..16364ead7 100644
--- a/src/core/hle/service/am/applets/profile_select.h
+++ b/src/core/hle/service/am/applets/profile_select.h
@@ -11,6 +11,10 @@
11#include "core/hle/result.h" 11#include "core/hle/result.h"
12#include "core/hle/service/am/applets/applets.h" 12#include "core/hle/service/am/applets/applets.h"
13 13
14namespace Core {
15class System;
16}
17
14namespace Service::AM::Applets { 18namespace Service::AM::Applets {
15 19
16struct UserSelectionConfig { 20struct UserSelectionConfig {
@@ -29,7 +33,8 @@ static_assert(sizeof(UserSelectionOutput) == 0x18, "UserSelectionOutput has inco
29 33
30class ProfileSelect final : public Applet { 34class ProfileSelect final : public Applet {
31public: 35public:
32 explicit ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend); 36 explicit ProfileSelect(Core::System& system_,
37 const Core::Frontend::ProfileSelectApplet& frontend_);
33 ~ProfileSelect() override; 38 ~ProfileSelect() override;
34 39
35 void Initialize() override; 40 void Initialize() override;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index e197990f7..748559cd0 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -39,8 +39,9 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
39 return params; 39 return params;
40} 40}
41 41
42SoftwareKeyboard::SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend) 42SoftwareKeyboard::SoftwareKeyboard(Core::System& system_,
43 : frontend(frontend) {} 43 const Core::Frontend::SoftwareKeyboardApplet& frontend_)
44 : Applet{system_.Kernel()}, frontend(frontend_) {}
44 45
45SoftwareKeyboard::~SoftwareKeyboard() = default; 46SoftwareKeyboard::~SoftwareKeyboard() = default;
46 47
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index 0fbc43e51..ef4801fc6 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -16,6 +16,10 @@
16 16
17union ResultCode; 17union ResultCode;
18 18
19namespace Core {
20class System;
21}
22
19namespace Service::AM::Applets { 23namespace Service::AM::Applets {
20 24
21enum class KeysetDisable : u32 { 25enum class KeysetDisable : u32 {
@@ -55,7 +59,8 @@ static_assert(sizeof(KeyboardConfig) == 0x3E0, "KeyboardConfig has incorrect siz
55 59
56class SoftwareKeyboard final : public Applet { 60class SoftwareKeyboard final : public Applet {
57public: 61public:
58 explicit SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend); 62 explicit SoftwareKeyboard(Core::System& system_,
63 const Core::Frontend::SoftwareKeyboardApplet& frontend_);
59 ~SoftwareKeyboard() override; 64 ~SoftwareKeyboard() override;
60 65
61 void Initialize() override; 66 void Initialize() override;
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index f3c9fef0e..32283e819 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -190,8 +190,9 @@ std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>&
190 return out; 190 return out;
191} 191}
192 192
193FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordType type) { 193FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id,
194 const auto& installed{Core::System::GetInstance().GetContentProvider()}; 194 FileSys::ContentRecordType type) {
195 const auto& installed{system.GetContentProvider()};
195 const auto res = installed.GetEntry(title_id, type); 196 const auto res = installed.GetEntry(title_id, type);
196 197
197 if (res != nullptr) { 198 if (res != nullptr) {
@@ -207,10 +208,10 @@ FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordTyp
207 208
208} // Anonymous namespace 209} // Anonymous namespace
209 210
210WebBrowser::WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, 211WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_,
211 Core::Frontend::ECommerceApplet* frontend_e_commerce) 212 Core::Frontend::ECommerceApplet* frontend_e_commerce_)
212 : frontend(frontend), frontend_e_commerce(frontend_e_commerce), 213 : Applet{system_.Kernel()}, frontend(frontend_),
213 current_process_title_id(current_process_title_id) {} 214 frontend_e_commerce(frontend_e_commerce_), system{system_} {}
214 215
215WebBrowser::~WebBrowser() = default; 216WebBrowser::~WebBrowser() = default;
216 217
@@ -266,7 +267,7 @@ void WebBrowser::UnpackRomFS() {
266 ASSERT(offline_romfs != nullptr); 267 ASSERT(offline_romfs != nullptr);
267 const auto dir = 268 const auto dir =
268 FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard); 269 FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
269 const auto& vfs{Core::System::GetInstance().GetFilesystem()}; 270 const auto& vfs{system.GetFilesystem()};
270 const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite); 271 const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite);
271 FileSys::VfsRawCopyD(dir, temp_dir); 272 FileSys::VfsRawCopyD(dir, temp_dir);
272 273
@@ -470,10 +471,10 @@ void WebBrowser::InitializeOffline() {
470 } 471 }
471 472
472 if (title_id == 0) { 473 if (title_id == 0) {
473 title_id = current_process_title_id; 474 title_id = system.CurrentProcess()->GetTitleID();
474 } 475 }
475 476
476 offline_romfs = GetApplicationRomFS(title_id, type); 477 offline_romfs = GetApplicationRomFS(system, title_id, type);
477 if (offline_romfs == nullptr) { 478 if (offline_romfs == nullptr) {
478 status = ResultCode(-1); 479 status = ResultCode(-1);
479 LOG_ERROR(Service_AM, "Failed to find offline data for request!"); 480 LOG_ERROR(Service_AM, "Failed to find offline data for request!");
diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h
index 870f57b64..8d4027411 100644
--- a/src/core/hle/service/am/applets/web_browser.h
+++ b/src/core/hle/service/am/applets/web_browser.h
@@ -9,6 +9,10 @@
9#include "core/hle/service/am/am.h" 9#include "core/hle/service/am/am.h"
10#include "core/hle/service/am/applets/applets.h" 10#include "core/hle/service/am/applets/applets.h"
11 11
12namespace Core {
13class System;
14}
15
12namespace Service::AM::Applets { 16namespace Service::AM::Applets {
13 17
14enum class ShimKind : u32; 18enum class ShimKind : u32;
@@ -17,8 +21,8 @@ enum class WebArgTLVType : u16;
17 21
18class WebBrowser final : public Applet { 22class WebBrowser final : public Applet {
19public: 23public:
20 WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id, 24 WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_,
21 Core::Frontend::ECommerceApplet* frontend_e_commerce = nullptr); 25 Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr);
22 26
23 ~WebBrowser() override; 27 ~WebBrowser() override;
24 28
@@ -59,8 +63,6 @@ private:
59 bool unpacked = false; 63 bool unpacked = false;
60 ResultCode status = RESULT_SUCCESS; 64 ResultCode status = RESULT_SUCCESS;
61 65
62 u64 current_process_title_id;
63
64 ShimKind kind; 66 ShimKind kind;
65 std::map<WebArgTLVType, std::vector<u8>> args; 67 std::map<WebArgTLVType, std::vector<u8>> args;
66 68
@@ -74,6 +76,8 @@ private:
74 std::optional<u128> user_id; 76 std::optional<u128> user_id;
75 std::optional<bool> shop_full_display; 77 std::optional<bool> shop_full_display;
76 std::string shop_extra_parameter; 78 std::string shop_extra_parameter;
79
80 Core::System& system;
77}; 81};
78 82
79} // namespace Service::AM::Applets 83} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp
index f3c09bbb1..85bbf5988 100644
--- a/src/core/hle/service/apm/apm.cpp
+++ b/src/core/hle/service/apm/apm.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 5#include "core/hle/ipc_helpers.h"
7#include "core/hle/service/apm/apm.h" 6#include "core/hle/service/apm/apm.h"
8#include "core/hle/service/apm/interface.h" 7#include "core/hle/service/apm/interface.h"
@@ -12,11 +11,15 @@ namespace Service::APM {
12Module::Module() = default; 11Module::Module() = default;
13Module::~Module() = default; 12Module::~Module() = default;
14 13
15void InstallInterfaces(SM::ServiceManager& service_manager) { 14void InstallInterfaces(Core::System& system) {
16 auto module_ = std::make_shared<Module>(); 15 auto module_ = std::make_shared<Module>();
17 std::make_shared<APM>(module_, "apm")->InstallAsService(service_manager); 16 std::make_shared<APM>(module_, system.GetAPMController(), "apm")
18 std::make_shared<APM>(module_, "apm:p")->InstallAsService(service_manager); 17 ->InstallAsService(system.ServiceManager());
19 std::make_shared<APM_Sys>()->InstallAsService(service_manager); 18 std::make_shared<APM>(module_, system.GetAPMController(), "apm:p")
19 ->InstallAsService(system.ServiceManager());
20 std::make_shared<APM>(module_, system.GetAPMController(), "apm:am")
21 ->InstallAsService(system.ServiceManager());
22 std::make_shared<APM_Sys>(system.GetAPMController())->InstallAsService(system.ServiceManager());
20} 23}
21 24
22} // namespace Service::APM 25} // namespace Service::APM
diff --git a/src/core/hle/service/apm/apm.h b/src/core/hle/service/apm/apm.h
index 4d7d5bb7c..cf4c2bb11 100644
--- a/src/core/hle/service/apm/apm.h
+++ b/src/core/hle/service/apm/apm.h
@@ -8,11 +8,6 @@
8 8
9namespace Service::APM { 9namespace Service::APM {
10 10
11enum class PerformanceMode : u8 {
12 Handheld = 0,
13 Docked = 1,
14};
15
16class Module final { 11class Module final {
17public: 12public:
18 Module(); 13 Module();
@@ -20,6 +15,6 @@ public:
20}; 15};
21 16
22/// Registers all AM services with the specified service manager. 17/// Registers all AM services with the specified service manager.
23void InstallInterfaces(SM::ServiceManager& service_manager); 18void InstallInterfaces(Core::System& system);
24 19
25} // namespace Service::APM 20} // namespace Service::APM
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp
new file mode 100644
index 000000000..4376612eb
--- /dev/null
+++ b/src/core/hle/service/apm/controller.cpp
@@ -0,0 +1,68 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/core_timing.h"
7#include "core/hle/service/apm/controller.h"
8#include "core/settings.h"
9
10namespace Service::APM {
11
12constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION =
13 PerformanceConfiguration::Config7;
14
15Controller::Controller(Core::Timing::CoreTiming& core_timing)
16 : core_timing(core_timing), configs{
17 {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION},
18 {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION},
19 } {}
20
21Controller::~Controller() = default;
22
23void Controller::SetPerformanceConfiguration(PerformanceMode mode,
24 PerformanceConfiguration config) {
25 static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{
26 {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020},
27 {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020},
28 {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224},
29 {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020},
30 {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020},
31 {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020},
32 {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785},
33 {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020},
34 };
35
36 SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second);
37 configs.insert_or_assign(mode, config);
38}
39
40void Controller::SetFromCpuBoostMode(CpuBoostMode mode) {
41 constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{
42 PerformanceConfiguration::Config7,
43 PerformanceConfiguration::Config13,
44 PerformanceConfiguration::Config15,
45 }};
46
47 SetPerformanceConfiguration(PerformanceMode::Docked,
48 BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode)));
49}
50
51PerformanceMode Controller::GetCurrentPerformanceMode() {
52 return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld;
53}
54
55PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) {
56 if (configs.find(mode) == configs.end()) {
57 configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION);
58 }
59
60 return configs[mode];
61}
62
63void Controller::SetClockSpeed(u32 mhz) {
64 LOG_INFO(Service_APM, "called, mhz={:08X}", mhz);
65 // TODO(DarkLordZach): Actually signal core_timing to change clock speed.
66}
67
68} // namespace Service::APM
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h
new file mode 100644
index 000000000..8ac80eaea
--- /dev/null
+++ b/src/core/hle/service/apm/controller.h
@@ -0,0 +1,70 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include "common/common_types.h"
9
10namespace Core::Timing {
11class CoreTiming;
12}
13
14namespace Service::APM {
15
16enum class PerformanceConfiguration : u32 {
17 Config1 = 0x00010000,
18 Config2 = 0x00010001,
19 Config3 = 0x00010002,
20 Config4 = 0x00020000,
21 Config5 = 0x00020001,
22 Config6 = 0x00020002,
23 Config7 = 0x00020003,
24 Config8 = 0x00020004,
25 Config9 = 0x00020005,
26 Config10 = 0x00020006,
27 Config11 = 0x92220007,
28 Config12 = 0x92220008,
29 Config13 = 0x92220009,
30 Config14 = 0x9222000A,
31 Config15 = 0x9222000B,
32 Config16 = 0x9222000C,
33};
34
35enum class CpuBoostMode : u32 {
36 Disabled = 0,
37 Full = 1, // CPU + GPU -> Config 13, 14, 15, or 16
38 Partial = 2, // GPU Only -> Config 15 or 16
39};
40
41enum class PerformanceMode : u8 {
42 Handheld = 0,
43 Docked = 1,
44};
45
46// Class to manage the state and change of the emulated system performance.
47// Specifically, this deals with PerformanceMode, which corresponds to the system being docked or
48// undocked, and PerformanceConfig which specifies the exact CPU, GPU, and Memory clocks to operate
49// at. Additionally, this manages 'Boost Mode', which allows games to temporarily overclock the
50// system during times of high load -- this simply maps to different PerformanceConfigs to use.
51class Controller {
52public:
53 Controller(Core::Timing::CoreTiming& core_timing);
54 ~Controller();
55
56 void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config);
57 void SetFromCpuBoostMode(CpuBoostMode mode);
58
59 PerformanceMode GetCurrentPerformanceMode();
60 PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode);
61
62private:
63 void SetClockSpeed(u32 mhz);
64
65 std::map<PerformanceMode, PerformanceConfiguration> configs;
66
67 Core::Timing::CoreTiming& core_timing;
68};
69
70} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index d058c0245..06f0f8edd 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -5,43 +5,32 @@
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/service/apm/apm.h" 7#include "core/hle/service/apm/apm.h"
8#include "core/hle/service/apm/controller.h"
8#include "core/hle/service/apm/interface.h" 9#include "core/hle/service/apm/interface.h"
9 10
10namespace Service::APM { 11namespace Service::APM {
11 12
12class ISession final : public ServiceFramework<ISession> { 13class ISession final : public ServiceFramework<ISession> {
13public: 14public:
14 ISession() : ServiceFramework("ISession") { 15 ISession(Controller& controller) : ServiceFramework("ISession"), controller(controller) {
15 static const FunctionInfo functions[] = { 16 static const FunctionInfo functions[] = {
16 {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, 17 {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"},
17 {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, 18 {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"},
19 {2, nullptr, "SetCpuOverclockEnabled"},
18 }; 20 };
19 RegisterHandlers(functions); 21 RegisterHandlers(functions);
20 } 22 }
21 23
22private: 24private:
23 enum class PerformanceConfiguration : u32 {
24 Config1 = 0x00010000,
25 Config2 = 0x00010001,
26 Config3 = 0x00010002,
27 Config4 = 0x00020000,
28 Config5 = 0x00020001,
29 Config6 = 0x00020002,
30 Config7 = 0x00020003,
31 Config8 = 0x00020004,
32 Config9 = 0x00020005,
33 Config10 = 0x00020006,
34 Config11 = 0x92220007,
35 Config12 = 0x92220008,
36 };
37
38 void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { 25 void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
39 IPC::RequestParser rp{ctx}; 26 IPC::RequestParser rp{ctx};
40 27
41 auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); 28 const auto mode = rp.PopEnum<PerformanceMode>();
42 u32 config = rp.Pop<u32>(); 29 const auto config = rp.PopEnum<PerformanceConfiguration>();
43 LOG_WARNING(Service_APM, "(STUBBED) called mode={} config={}", static_cast<u32>(mode), 30 LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode),
44 config); 31 static_cast<u32>(config));
32
33 controller.SetPerformanceConfiguration(mode, config);
45 34
46 IPC::ResponseBuilder rb{ctx, 2}; 35 IPC::ResponseBuilder rb{ctx, 2};
47 rb.Push(RESULT_SUCCESS); 36 rb.Push(RESULT_SUCCESS);
@@ -50,20 +39,23 @@ private:
50 void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { 39 void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
51 IPC::RequestParser rp{ctx}; 40 IPC::RequestParser rp{ctx};
52 41
53 auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); 42 const auto mode = rp.PopEnum<PerformanceMode>();
54 LOG_WARNING(Service_APM, "(STUBBED) called mode={}", static_cast<u32>(mode)); 43 LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode));
55 44
56 IPC::ResponseBuilder rb{ctx, 3}; 45 IPC::ResponseBuilder rb{ctx, 3};
57 rb.Push(RESULT_SUCCESS); 46 rb.Push(RESULT_SUCCESS);
58 rb.Push<u32>(static_cast<u32>(PerformanceConfiguration::Config1)); 47 rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode));
59 } 48 }
49
50 Controller& controller;
60}; 51};
61 52
62APM::APM(std::shared_ptr<Module> apm, const char* name) 53APM::APM(std::shared_ptr<Module> apm, Controller& controller, const char* name)
63 : ServiceFramework(name), apm(std::move(apm)) { 54 : ServiceFramework(name), apm(std::move(apm)), controller(controller) {
64 static const FunctionInfo functions[] = { 55 static const FunctionInfo functions[] = {
65 {0, &APM::OpenSession, "OpenSession"}, 56 {0, &APM::OpenSession, "OpenSession"},
66 {1, nullptr, "GetPerformanceMode"}, 57 {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
58 {6, nullptr, "IsCpuOverclockEnabled"},
67 }; 59 };
68 RegisterHandlers(functions); 60 RegisterHandlers(functions);
69} 61}
@@ -75,10 +67,17 @@ void APM::OpenSession(Kernel::HLERequestContext& ctx) {
75 67
76 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 68 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
77 rb.Push(RESULT_SUCCESS); 69 rb.Push(RESULT_SUCCESS);
78 rb.PushIpcInterface<ISession>(); 70 rb.PushIpcInterface<ISession>(controller);
71}
72
73void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
74 LOG_DEBUG(Service_APM, "called");
75
76 IPC::ResponseBuilder rb{ctx, 2};
77 rb.PushEnum(controller.GetCurrentPerformanceMode());
79} 78}
80 79
81APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { 80APM_Sys::APM_Sys(Controller& controller) : ServiceFramework{"apm:sys"}, controller(controller) {
82 // clang-format off 81 // clang-format off
83 static const FunctionInfo functions[] = { 82 static const FunctionInfo functions[] = {
84 {0, nullptr, "RequestPerformanceMode"}, 83 {0, nullptr, "RequestPerformanceMode"},
@@ -87,8 +86,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} {
87 {3, nullptr, "GetLastThrottlingState"}, 86 {3, nullptr, "GetLastThrottlingState"},
88 {4, nullptr, "ClearLastThrottlingState"}, 87 {4, nullptr, "ClearLastThrottlingState"},
89 {5, nullptr, "LoadAndApplySettings"}, 88 {5, nullptr, "LoadAndApplySettings"},
90 {6, nullptr, "SetCpuBoostMode"}, 89 {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"},
91 {7, nullptr, "GetCurrentPerformanceConfiguration"}, 90 {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"},
92 }; 91 };
93 // clang-format on 92 // clang-format on
94 93
@@ -102,7 +101,28 @@ void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) {
102 101
103 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 102 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
104 rb.Push(RESULT_SUCCESS); 103 rb.Push(RESULT_SUCCESS);
105 rb.PushIpcInterface<ISession>(); 104 rb.PushIpcInterface<ISession>(controller);
105}
106
107void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
108 IPC::RequestParser rp{ctx};
109 const auto mode = rp.PopEnum<CpuBoostMode>();
110
111 LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode));
112
113 controller.SetFromCpuBoostMode(mode);
114
115 IPC::ResponseBuilder rb{ctx, 2};
116 rb.Push(RESULT_SUCCESS);
117}
118
119void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
120 LOG_DEBUG(Service_APM, "called");
121
122 IPC::ResponseBuilder rb{ctx, 3};
123 rb.Push(RESULT_SUCCESS);
124 rb.PushEnum(
125 controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode()));
106} 126}
107 127
108} // namespace Service::APM 128} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h
index 773541aa4..de1b89437 100644
--- a/src/core/hle/service/apm/interface.h
+++ b/src/core/hle/service/apm/interface.h
@@ -8,24 +8,34 @@
8 8
9namespace Service::APM { 9namespace Service::APM {
10 10
11class Controller;
12class Module;
13
11class APM final : public ServiceFramework<APM> { 14class APM final : public ServiceFramework<APM> {
12public: 15public:
13 explicit APM(std::shared_ptr<Module> apm, const char* name); 16 explicit APM(std::shared_ptr<Module> apm, Controller& controller, const char* name);
14 ~APM() override; 17 ~APM() override;
15 18
16private: 19private:
17 void OpenSession(Kernel::HLERequestContext& ctx); 20 void OpenSession(Kernel::HLERequestContext& ctx);
21 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
18 22
19 std::shared_ptr<Module> apm; 23 std::shared_ptr<Module> apm;
24 Controller& controller;
20}; 25};
21 26
22class APM_Sys final : public ServiceFramework<APM_Sys> { 27class APM_Sys final : public ServiceFramework<APM_Sys> {
23public: 28public:
24 explicit APM_Sys(); 29 explicit APM_Sys(Controller& controller);
25 ~APM_Sys() override; 30 ~APM_Sys() override;
26 31
32 void SetCpuBoostMode(Kernel::HLERequestContext& ctx);
33
27private: 34private:
28 void GetPerformanceEvent(Kernel::HLERequestContext& ctx); 35 void GetPerformanceEvent(Kernel::HLERequestContext& ctx);
36 void GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx);
37
38 Controller& controller;
29}; 39};
30 40
31} // namespace Service::APM 41} // namespace Service::APM
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp
index 128df7db5..1781bec83 100644
--- a/src/core/hle/service/audio/audio.cpp
+++ b/src/core/hle/service/audio/audio.cpp
@@ -19,16 +19,16 @@
19 19
20namespace Service::Audio { 20namespace Service::Audio {
21 21
22void InstallInterfaces(SM::ServiceManager& service_manager) { 22void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
23 std::make_shared<AudCtl>()->InstallAsService(service_manager); 23 std::make_shared<AudCtl>()->InstallAsService(service_manager);
24 std::make_shared<AudOutA>()->InstallAsService(service_manager); 24 std::make_shared<AudOutA>()->InstallAsService(service_manager);
25 std::make_shared<AudOutU>()->InstallAsService(service_manager); 25 std::make_shared<AudOutU>(system)->InstallAsService(service_manager);
26 std::make_shared<AudInA>()->InstallAsService(service_manager); 26 std::make_shared<AudInA>()->InstallAsService(service_manager);
27 std::make_shared<AudInU>()->InstallAsService(service_manager); 27 std::make_shared<AudInU>()->InstallAsService(service_manager);
28 std::make_shared<AudRecA>()->InstallAsService(service_manager); 28 std::make_shared<AudRecA>()->InstallAsService(service_manager);
29 std::make_shared<AudRecU>()->InstallAsService(service_manager); 29 std::make_shared<AudRecU>()->InstallAsService(service_manager);
30 std::make_shared<AudRenA>()->InstallAsService(service_manager); 30 std::make_shared<AudRenA>()->InstallAsService(service_manager);
31 std::make_shared<AudRenU>()->InstallAsService(service_manager); 31 std::make_shared<AudRenU>(system)->InstallAsService(service_manager);
32 std::make_shared<CodecCtl>()->InstallAsService(service_manager); 32 std::make_shared<CodecCtl>()->InstallAsService(service_manager);
33 std::make_shared<HwOpus>()->InstallAsService(service_manager); 33 std::make_shared<HwOpus>()->InstallAsService(service_manager);
34 34
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h
index f5bd3bf5f..b6d13912e 100644
--- a/src/core/hle/service/audio/audio.h
+++ b/src/core/hle/service/audio/audio.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Core {
8class System;
9}
10
7namespace Service::SM { 11namespace Service::SM {
8class ServiceManager; 12class ServiceManager;
9} 13}
@@ -11,6 +15,6 @@ class ServiceManager;
11namespace Service::Audio { 15namespace Service::Audio {
12 16
13/// Registers all Audio services with the specified service manager. 17/// Registers all Audio services with the specified service manager.
14void InstallInterfaces(SM::ServiceManager& service_manager); 18void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
15 19
16} // namespace Service::Audio 20} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 7db6eb08d..fb84a8f13 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -40,8 +40,8 @@ enum class AudioState : u32 {
40 40
41class IAudioOut final : public ServiceFramework<IAudioOut> { 41class IAudioOut final : public ServiceFramework<IAudioOut> {
42public: 42public:
43 IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name, 43 IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core,
44 std::string&& unique_name) 44 std::string&& device_name, std::string&& unique_name)
45 : ServiceFramework("IAudioOut"), audio_core(audio_core), 45 : ServiceFramework("IAudioOut"), audio_core(audio_core),
46 device_name(std::move(device_name)), audio_params(audio_params) { 46 device_name(std::move(device_name)), audio_params(audio_params) {
47 // clang-format off 47 // clang-format off
@@ -65,7 +65,6 @@ public:
65 RegisterHandlers(functions); 65 RegisterHandlers(functions);
66 66
67 // This is the event handle used to check if the audio buffer was released 67 // This is the event handle used to check if the audio buffer was released
68 auto& system = Core::System::GetInstance();
69 buffer_event = Kernel::WritableEvent::CreateEventPair( 68 buffer_event = Kernel::WritableEvent::CreateEventPair(
70 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); 69 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
71 70
@@ -212,6 +211,22 @@ private:
212 Kernel::EventPair buffer_event; 211 Kernel::EventPair buffer_event;
213}; 212};
214 213
214AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} {
215 // clang-format off
216 static const FunctionInfo functions[] = {
217 {0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
218 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
219 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
220 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"},
221 };
222 // clang-format on
223
224 RegisterHandlers(functions);
225 audio_core = std::make_unique<AudioCore::AudioOut>();
226}
227
228AudOutU::~AudOutU() = default;
229
215void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { 230void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
216 LOG_DEBUG(Service_Audio, "called"); 231 LOG_DEBUG(Service_Audio, "called");
217 232
@@ -248,7 +263,7 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
248 263
249 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; 264 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())};
250 auto audio_out_interface = std::make_shared<IAudioOut>( 265 auto audio_out_interface = std::make_shared<IAudioOut>(
251 params, *audio_core, std::move(device_name), std::move(unique_name)); 266 system, params, *audio_core, std::move(device_name), std::move(unique_name));
252 267
253 IPC::ResponseBuilder rb{ctx, 6, 0, 1}; 268 IPC::ResponseBuilder rb{ctx, 6, 0, 1};
254 rb.Push(RESULT_SUCCESS); 269 rb.Push(RESULT_SUCCESS);
@@ -256,20 +271,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
256 rb.Push<u32>(params.channel_count); 271 rb.Push<u32>(params.channel_count);
257 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); 272 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16));
258 rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); 273 rb.Push<u32>(static_cast<u32>(AudioState::Stopped));
259 rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); 274 rb.PushIpcInterface<IAudioOut>(audio_out_interface);
260 275
261 audio_out_interfaces.push_back(std::move(audio_out_interface)); 276 audio_out_interfaces.push_back(std::move(audio_out_interface));
262} 277}
263 278
264AudOutU::AudOutU() : ServiceFramework("audout:u") {
265 static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
266 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
267 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
268 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}};
269 RegisterHandlers(functions);
270 audio_core = std::make_unique<AudioCore::AudioOut>();
271}
272
273AudOutU::~AudOutU() = default;
274
275} // namespace Service::Audio 279} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index aed4c43b2..c9f532ccd 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -11,6 +11,10 @@ namespace AudioCore {
11class AudioOut; 11class AudioOut;
12} 12}
13 13
14namespace Core {
15class System;
16}
17
14namespace Kernel { 18namespace Kernel {
15class HLERequestContext; 19class HLERequestContext;
16} 20}
@@ -21,15 +25,17 @@ class IAudioOut;
21 25
22class AudOutU final : public ServiceFramework<AudOutU> { 26class AudOutU final : public ServiceFramework<AudOutU> {
23public: 27public:
24 AudOutU(); 28 explicit AudOutU(Core::System& system_);
25 ~AudOutU() override; 29 ~AudOutU() override;
26 30
27private: 31private:
32 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx);
33 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
34
28 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; 35 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces;
29 std::unique_ptr<AudioCore::AudioOut> audio_core; 36 std::unique_ptr<AudioCore::AudioOut> audio_core;
30 37
31 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); 38 Core::System& system;
32 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
33}; 39};
34 40
35} // namespace Service::Audio 41} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 75db0c2dc..f162249ed 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -5,6 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <memory> 7#include <memory>
8#include <string_view>
8 9
9#include "audio_core/audio_renderer.h" 10#include "audio_core/audio_renderer.h"
10#include "common/alignment.h" 11#include "common/alignment.h"
@@ -25,7 +26,8 @@ namespace Service::Audio {
25 26
26class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { 27class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
27public: 28public:
28 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) 29 explicit IAudioRenderer(Core::System& system, AudioCore::AudioRendererParameter audren_params,
30 const std::size_t instance_number)
29 : ServiceFramework("IAudioRenderer") { 31 : ServiceFramework("IAudioRenderer") {
30 // clang-format off 32 // clang-format off
31 static const FunctionInfo functions[] = { 33 static const FunctionInfo functions[] = {
@@ -45,11 +47,10 @@ public:
45 // clang-format on 47 // clang-format on
46 RegisterHandlers(functions); 48 RegisterHandlers(functions);
47 49
48 auto& system = Core::System::GetInstance();
49 system_event = Kernel::WritableEvent::CreateEventPair( 50 system_event = Kernel::WritableEvent::CreateEventPair(
50 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); 51 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
51 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
52 system_event.writable); 53 system.CoreTiming(), audren_params, system_event.writable, instance_number);
53 } 54 }
54 55
55private: 56private:
@@ -159,40 +160,81 @@ private:
159 160
160class IAudioDevice final : public ServiceFramework<IAudioDevice> { 161class IAudioDevice final : public ServiceFramework<IAudioDevice> {
161public: 162public:
162 IAudioDevice() : ServiceFramework("IAudioDevice") { 163 explicit IAudioDevice(Core::System& system, u32_le revision_num)
164 : ServiceFramework("IAudioDevice"), revision{revision_num} {
163 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
164 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, 166 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"},
165 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, 167 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"},
166 {2, nullptr, "GetAudioDeviceOutputVolume"}, 168 {2, &IAudioDevice::GetAudioDeviceOutputVolume, "GetAudioDeviceOutputVolume"},
167 {3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"}, 169 {3, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceName"},
168 {4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"}, 170 {4, &IAudioDevice::QueryAudioDeviceSystemEvent, "QueryAudioDeviceSystemEvent"},
169 {5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"}, 171 {5, &IAudioDevice::GetActiveChannelCount, "GetActiveChannelCount"},
170 {6, &IAudioDevice::ListAudioDeviceName, 172 {6, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceNameAuto"},
171 "ListAudioDeviceNameAuto"}, // TODO(ogniK): Confirm if autos are identical to non auto
172 {7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"}, 173 {7, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolumeAuto"},
173 {8, nullptr, "GetAudioDeviceOutputVolumeAuto"}, 174 {8, &IAudioDevice::GetAudioDeviceOutputVolume, "GetAudioDeviceOutputVolumeAuto"},
174 {10, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"}, 175 {10, &IAudioDevice::GetActiveAudioDeviceName, "GetActiveAudioDeviceNameAuto"},
175 {11, nullptr, "QueryAudioDeviceInputEvent"}, 176 {11, &IAudioDevice::QueryAudioDeviceInputEvent, "QueryAudioDeviceInputEvent"},
176 {12, nullptr, "QueryAudioDeviceOutputEvent"}, 177 {12, &IAudioDevice::QueryAudioDeviceOutputEvent, "QueryAudioDeviceOutputEvent"},
177 {13, nullptr, "GetAudioSystemMasterVolumeSetting"}, 178 {13, nullptr, "GetAudioSystemMasterVolumeSetting"},
178 }; 179 };
179 RegisterHandlers(functions); 180 RegisterHandlers(functions);
180 181
181 auto& kernel = Core::System::GetInstance().Kernel(); 182 auto& kernel = system.Kernel();
182 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, 183 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
183 "IAudioOutBufferReleasedEvent"); 184 "IAudioOutBufferReleasedEvent");
185
186 // Should be similar to audio_output_device_switch_event
187 audio_input_device_switch_event = Kernel::WritableEvent::CreateEventPair(
188 kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioInputDeviceSwitchedEvent");
189
190 // Should only be signalled when an audio output device has been changed, example: speaker
191 // to headset
192 audio_output_device_switch_event = Kernel::WritableEvent::CreateEventPair(
193 kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioOutputDeviceSwitchedEvent");
184 } 194 }
185 195
186private: 196private:
197 using AudioDeviceName = std::array<char, 256>;
198 static constexpr std::array<std::string_view, 4> audio_device_names{{
199 "AudioStereoJackOutput",
200 "AudioBuiltInSpeakerOutput",
201 "AudioTvOutput",
202 "AudioUsbDeviceOutput",
203 }};
204 enum class DeviceType {
205 AHUBHeadphones,
206 AHUBSpeakers,
207 HDA,
208 USBOutput,
209 };
210
187 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { 211 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
188 LOG_WARNING(Service_Audio, "(STUBBED) called"); 212 LOG_DEBUG(Service_Audio, "called");
213
214 const bool usb_output_supported =
215 IsFeatureSupported(AudioFeatures::AudioUSBDeviceOutput, revision);
216 const std::size_t count = ctx.GetWriteBufferSize() / sizeof(AudioDeviceName);
189 217
190 constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; 218 std::vector<AudioDeviceName> name_buffer;
191 ctx.WriteBuffer(audio_interface); 219 name_buffer.reserve(audio_device_names.size());
220
221 for (std::size_t i = 0; i < count && i < audio_device_names.size(); i++) {
222 const auto type = static_cast<DeviceType>(i);
223
224 if (!usb_output_supported && type == DeviceType::USBOutput) {
225 continue;
226 }
227
228 const auto& device_name = audio_device_names[i];
229 auto& entry = name_buffer.emplace_back();
230 device_name.copy(entry.data(), device_name.size());
231 }
232
233 ctx.WriteBuffer(name_buffer);
192 234
193 IPC::ResponseBuilder rb{ctx, 3}; 235 IPC::ResponseBuilder rb{ctx, 3};
194 rb.Push(RESULT_SUCCESS); 236 rb.Push(RESULT_SUCCESS);
195 rb.Push<u32>(1); 237 rb.Push(static_cast<u32>(name_buffer.size()));
196 } 238 }
197 239
198 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 240 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
@@ -208,15 +250,32 @@ private:
208 rb.Push(RESULT_SUCCESS); 250 rb.Push(RESULT_SUCCESS);
209 } 251 }
210 252
253 void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
254 IPC::RequestParser rp{ctx};
255
256 const auto device_name_buffer = ctx.ReadBuffer();
257 const std::string name = Common::StringFromBuffer(device_name_buffer);
258
259 LOG_WARNING(Service_Audio, "(STUBBED) called. name={}", name);
260
261 IPC::ResponseBuilder rb{ctx, 3};
262 rb.Push(RESULT_SUCCESS);
263 rb.Push(1.0f);
264 }
265
211 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { 266 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
212 LOG_WARNING(Service_Audio, "(STUBBED) called"); 267 LOG_WARNING(Service_Audio, "(STUBBED) called");
213 268
214 constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; 269 // Currently set to always be TV audio output.
215 ctx.WriteBuffer(audio_interface); 270 const auto& device_name = audio_device_names[2];
216 271
217 IPC::ResponseBuilder rb{ctx, 3}; 272 AudioDeviceName out_device_name{};
273 device_name.copy(out_device_name.data(), device_name.size());
274
275 ctx.WriteBuffer(out_device_name);
276
277 IPC::ResponseBuilder rb{ctx, 2};
218 rb.Push(RESULT_SUCCESS); 278 rb.Push(RESULT_SUCCESS);
219 rb.Push<u32>(1);
220 } 279 }
221 280
222 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { 281 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) {
@@ -237,11 +296,31 @@ private:
237 rb.Push<u32>(1); 296 rb.Push<u32>(1);
238 } 297 }
239 298
299 // Should be similar to QueryAudioDeviceOutputEvent
300 void QueryAudioDeviceInputEvent(Kernel::HLERequestContext& ctx) {
301 LOG_WARNING(Service_Audio, "(STUBBED) called");
302
303 IPC::ResponseBuilder rb{ctx, 2, 1};
304 rb.Push(RESULT_SUCCESS);
305 rb.PushCopyObjects(audio_input_device_switch_event.readable);
306 }
307
308 void QueryAudioDeviceOutputEvent(Kernel::HLERequestContext& ctx) {
309 LOG_DEBUG(Service_Audio, "called");
310
311 IPC::ResponseBuilder rb{ctx, 2, 1};
312 rb.Push(RESULT_SUCCESS);
313 rb.PushCopyObjects(audio_output_device_switch_event.readable);
314 }
315
316 u32_le revision = 0;
240 Kernel::EventPair buffer_event; 317 Kernel::EventPair buffer_event;
318 Kernel::EventPair audio_input_device_switch_event;
319 Kernel::EventPair audio_output_device_switch_event;
241 320
242}; // namespace Audio 321}; // namespace Audio
243 322
244AudRenU::AudRenU() : ServiceFramework("audren:u") { 323AudRenU::AudRenU(Core::System& system_) : ServiceFramework("audren:u"), system{system_} {
245 // clang-format off 324 // clang-format off
246 static const FunctionInfo functions[] = { 325 static const FunctionInfo functions[] = {
247 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 326 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
@@ -314,7 +393,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
314 }; 393 };
315 394
316 // Calculates the portion of the size related to the mix data (and the sorting thereof). 395 // Calculates the portion of the size related to the mix data (and the sorting thereof).
317 const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { 396 const auto calculate_mix_info_size = [](const AudioCore::AudioRendererParameter& params) {
318 // The size of the mixing info data structure. 397 // The size of the mixing info data structure.
319 constexpr u64 mix_info_size = 0x940; 398 constexpr u64 mix_info_size = 0x940;
320 399
@@ -386,7 +465,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
386 465
387 // Calculates the part of the size related to the splitter context. 466 // Calculates the part of the size related to the splitter context.
388 const auto calculate_splitter_context_size = 467 const auto calculate_splitter_context_size =
389 [this](const AudioCore::AudioRendererParameter& params) -> u64 { 468 [](const AudioCore::AudioRendererParameter& params) -> u64 {
390 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 469 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
391 return 0; 470 return 0;
392 } 471 }
@@ -433,7 +512,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
433 }; 512 };
434 513
435 // Calculates the part of the size related to performance statistics. 514 // Calculates the part of the size related to performance statistics.
436 const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { 515 const auto calculate_perf_size = [](const AudioCore::AudioRendererParameter& params) {
437 // Extra size value appended to the end of the calculation. 516 // Extra size value appended to the end of the calculation.
438 constexpr u64 appended = 128; 517 constexpr u64 appended = 128;
439 518
@@ -460,78 +539,76 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
460 }; 539 };
461 540
462 // Calculates the part of the size that relates to the audio command buffer. 541 // Calculates the part of the size that relates to the audio command buffer.
463 const auto calculate_command_buffer_size = 542 const auto calculate_command_buffer_size = [](const AudioCore::AudioRendererParameter& params) {
464 [this](const AudioCore::AudioRendererParameter& params) { 543 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
465 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
466 544
467 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { 545 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
468 constexpr u64 command_buffer_size = 0x18000; 546 constexpr u64 command_buffer_size = 0x18000;
469 547
470 return command_buffer_size + alignment; 548 return command_buffer_size + alignment;
471 } 549 }
472 550
473 // When the variadic command buffer is supported, this means 551 // When the variadic command buffer is supported, this means
474 // the command generator for the audio renderer can issue commands 552 // the command generator for the audio renderer can issue commands
475 // that are (as one would expect), variable in size. So what we need to do 553 // that are (as one would expect), variable in size. So what we need to do
476 // is determine the maximum possible size for a few command data structures 554 // is determine the maximum possible size for a few command data structures
477 // then multiply them by the amount of present commands indicated by the given 555 // then multiply them by the amount of present commands indicated by the given
478 // respective audio parameters. 556 // respective audio parameters.
479 557
480 constexpr u64 max_biquad_filters = 2; 558 constexpr u64 max_biquad_filters = 2;
481 constexpr u64 max_mix_buffers = 24; 559 constexpr u64 max_mix_buffers = 24;
482 560
483 constexpr u64 biquad_filter_command_size = 0x2C; 561 constexpr u64 biquad_filter_command_size = 0x2C;
484 562
485 constexpr u64 depop_mix_command_size = 0x24; 563 constexpr u64 depop_mix_command_size = 0x24;
486 constexpr u64 depop_setup_command_size = 0x50; 564 constexpr u64 depop_setup_command_size = 0x50;
487 565
488 constexpr u64 effect_command_max_size = 0x540; 566 constexpr u64 effect_command_max_size = 0x540;
489 567
490 constexpr u64 mix_command_size = 0x1C; 568 constexpr u64 mix_command_size = 0x1C;
491 constexpr u64 mix_ramp_command_size = 0x24; 569 constexpr u64 mix_ramp_command_size = 0x24;
492 constexpr u64 mix_ramp_grouped_command_size = 0x13C; 570 constexpr u64 mix_ramp_grouped_command_size = 0x13C;
493 571
494 constexpr u64 perf_command_size = 0x28; 572 constexpr u64 perf_command_size = 0x28;
495 573
496 constexpr u64 sink_command_size = 0x130; 574 constexpr u64 sink_command_size = 0x130;
497 575
498 constexpr u64 submix_command_max_size = 576 constexpr u64 submix_command_max_size =
499 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; 577 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
500 578
501 constexpr u64 volume_command_size = 0x1C; 579 constexpr u64 volume_command_size = 0x1C;
502 constexpr u64 volume_ramp_command_size = 0x20; 580 constexpr u64 volume_ramp_command_size = 0x20;
503 581
504 constexpr u64 voice_biquad_filter_command_size = 582 constexpr u64 voice_biquad_filter_command_size =
505 biquad_filter_command_size * max_biquad_filters; 583 biquad_filter_command_size * max_biquad_filters;
506 constexpr u64 voice_data_command_size = 0x9C; 584 constexpr u64 voice_data_command_size = 0x9C;
507 const u64 voice_command_max_size = 585 const u64 voice_command_max_size =
508 (params.splitter_count * depop_setup_command_size) + 586 (params.splitter_count * depop_setup_command_size) +
509 (voice_data_command_size + voice_biquad_filter_command_size + 587 (voice_data_command_size + voice_biquad_filter_command_size + volume_ramp_command_size +
510 volume_ramp_command_size + mix_ramp_grouped_command_size); 588 mix_ramp_grouped_command_size);
511 589
512 // Now calculate the individual elements that comprise the size and add them together. 590 // Now calculate the individual elements that comprise the size and add them together.
513 const u64 effect_commands_size = params.effect_count * effect_command_max_size; 591 const u64 effect_commands_size = params.effect_count * effect_command_max_size;
514 592
515 const u64 final_mix_commands_size = 593 const u64 final_mix_commands_size =
516 depop_mix_command_size + volume_command_size * max_mix_buffers; 594 depop_mix_command_size + volume_command_size * max_mix_buffers;
517 595
518 const u64 perf_commands_size = 596 const u64 perf_commands_size =
519 perf_command_size * 597 perf_command_size * (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
520 (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
521 598
522 const u64 sink_commands_size = params.sink_count * sink_command_size; 599 const u64 sink_commands_size = params.sink_count * sink_command_size;
523 600
524 const u64 splitter_commands_size = 601 const u64 splitter_commands_size =
525 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; 602 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
526 603
527 const u64 submix_commands_size = params.submix_count * submix_command_max_size; 604 const u64 submix_commands_size = params.submix_count * submix_command_max_size;
528 605
529 const u64 voice_commands_size = params.voice_count * voice_command_max_size; 606 const u64 voice_commands_size = params.voice_count * voice_command_max_size;
530 607
531 return effect_commands_size + final_mix_commands_size + perf_commands_size + 608 return effect_commands_size + final_mix_commands_size + perf_commands_size +
532 sink_commands_size + splitter_commands_size + submix_commands_size + 609 sink_commands_size + splitter_commands_size + submix_commands_size +
533 voice_commands_size + alignment; 610 voice_commands_size + alignment;
534 }; 611 };
535 612
536 IPC::RequestParser rp{ctx}; 613 IPC::RequestParser rp{ctx};
537 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); 614 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
@@ -564,12 +641,16 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
564} 641}
565 642
566void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { 643void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
567 LOG_DEBUG(Service_Audio, "called"); 644 IPC::RequestParser rp{ctx};
645 const u64 aruid = rp.Pop<u64>();
568 646
569 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 647 LOG_DEBUG(Service_Audio, "called. aruid={:016X}", aruid);
570 648
649 // Revisionless variant of GetAudioDeviceServiceWithRevisionInfo that
650 // always assumes the initial release revision (REV1).
651 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
571 rb.Push(RESULT_SUCCESS); 652 rb.Push(RESULT_SUCCESS);
572 rb.PushIpcInterface<Audio::IAudioDevice>(); 653 rb.PushIpcInterface<IAudioDevice>(system, Common::MakeMagic('R', 'E', 'V', '1'));
573} 654}
574 655
575void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { 656void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
@@ -579,13 +660,19 @@ void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
579} 660}
580 661
581void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 662void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
582 LOG_WARNING(Service_Audio, "(STUBBED) called"); 663 struct Parameters {
664 u32 revision;
665 u64 aruid;
666 };
583 667
584 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 668 IPC::RequestParser rp{ctx};
669 const auto [revision, aruid] = rp.PopRaw<Parameters>();
670
671 LOG_DEBUG(Service_Audio, "called. revision={:08X}, aruid={:016X}", revision, aruid);
585 672
673 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
586 rb.Push(RESULT_SUCCESS); 674 rb.Push(RESULT_SUCCESS);
587 rb.PushIpcInterface<Audio::IAudioDevice>(); // TODO(ogniK): Figure out what is different 675 rb.PushIpcInterface<IAudioDevice>(system, revision);
588 // based on the current revision
589} 676}
590 677
591void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { 678void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
@@ -594,14 +681,16 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
594 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 681 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
595 682
596 rb.Push(RESULT_SUCCESS); 683 rb.Push(RESULT_SUCCESS);
597 rb.PushIpcInterface<IAudioRenderer>(params); 684 rb.PushIpcInterface<IAudioRenderer>(system, params, audren_instance_count++);
598} 685}
599 686
600bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 687bool IsFeatureSupported(AudioFeatures feature, u32_le revision) {
601 // Byte swap 688 // Byte swap
602 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); 689 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
603 690
604 switch (feature) { 691 switch (feature) {
692 case AudioFeatures::AudioUSBDeviceOutput:
693 return version_num >= 4U;
605 case AudioFeatures::Splitter: 694 case AudioFeatures::Splitter:
606 return version_num >= 2U; 695 return version_num >= 2U;
607 case AudioFeatures::PerformanceMetricsVersion2: 696 case AudioFeatures::PerformanceMetricsVersion2:
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 1d3c8df61..4e0ccc792 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -6,6 +6,10 @@
6 6
7#include "core/hle/service/service.h" 7#include "core/hle/service/service.h"
8 8
9namespace Core {
10class System;
11}
12
9namespace Kernel { 13namespace Kernel {
10class HLERequestContext; 14class HLERequestContext;
11} 15}
@@ -14,7 +18,7 @@ namespace Service::Audio {
14 18
15class AudRenU final : public ServiceFramework<AudRenU> { 19class AudRenU final : public ServiceFramework<AudRenU> {
16public: 20public:
17 explicit AudRenU(); 21 explicit AudRenU(Core::System& system_);
18 ~AudRenU() override; 22 ~AudRenU() override;
19 23
20private: 24private:
@@ -26,13 +30,19 @@ private:
26 30
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); 31 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28 32
29 enum class AudioFeatures : u32 { 33 std::size_t audren_instance_count = 0;
30 Splitter, 34 Core::System& system;
31 PerformanceMetricsVersion2, 35};
32 VariadicCommandBuffer,
33 };
34 36
35 bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; 37// Describes a particular audio feature that may be supported in a particular revision.
38enum class AudioFeatures : u32 {
39 AudioUSBDeviceOutput,
40 Splitter,
41 PerformanceMetricsVersion2,
42 VariadicCommandBuffer,
36}; 43};
37 44
45// Tests if a particular audio feature is supported with a given audio revision.
46bool IsFeatureSupported(AudioFeatures feature, u32_le revision);
47
38} // namespace Service::Audio 48} // namespace Service::Audio
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index 6701cb913..af70d174d 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -2,32 +2,37 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/crypto/key_manager.h"
6#include "core/hle/ipc_helpers.h"
5#include "core/hle/service/service.h" 7#include "core/hle/service/service.h"
6 8
7namespace Service::ES { 9namespace Service::ES {
8 10
11constexpr ResultCode ERROR_INVALID_ARGUMENT{ErrorModule::ETicket, 2};
12constexpr ResultCode ERROR_INVALID_RIGHTS_ID{ErrorModule::ETicket, 3};
13
9class ETicket final : public ServiceFramework<ETicket> { 14class ETicket final : public ServiceFramework<ETicket> {
10public: 15public:
11 explicit ETicket() : ServiceFramework{"es"} { 16 explicit ETicket() : ServiceFramework{"es"} {
12 // clang-format off 17 // clang-format off
13 static const FunctionInfo functions[] = { 18 static const FunctionInfo functions[] = {
14 {1, nullptr, "ImportTicket"}, 19 {1, &ETicket::ImportTicket, "ImportTicket"},
15 {2, nullptr, "ImportTicketCertificateSet"}, 20 {2, nullptr, "ImportTicketCertificateSet"},
16 {3, nullptr, "DeleteTicket"}, 21 {3, nullptr, "DeleteTicket"},
17 {4, nullptr, "DeletePersonalizedTicket"}, 22 {4, nullptr, "DeletePersonalizedTicket"},
18 {5, nullptr, "DeleteAllCommonTicket"}, 23 {5, nullptr, "DeleteAllCommonTicket"},
19 {6, nullptr, "DeleteAllPersonalizedTicket"}, 24 {6, nullptr, "DeleteAllPersonalizedTicket"},
20 {7, nullptr, "DeleteAllPersonalizedTicketEx"}, 25 {7, nullptr, "DeleteAllPersonalizedTicketEx"},
21 {8, nullptr, "GetTitleKey"}, 26 {8, &ETicket::GetTitleKey, "GetTitleKey"},
22 {9, nullptr, "CountCommonTicket"}, 27 {9, &ETicket::CountCommonTicket, "CountCommonTicket"},
23 {10, nullptr, "CountPersonalizedTicket"}, 28 {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"},
24 {11, nullptr, "ListCommonTicket"}, 29 {11, &ETicket::ListCommonTicket, "ListCommonTicket"},
25 {12, nullptr, "ListPersonalizedTicket"}, 30 {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"},
26 {13, nullptr, "ListMissingPersonalizedTicket"}, 31 {13, nullptr, "ListMissingPersonalizedTicket"},
27 {14, nullptr, "GetCommonTicketSize"}, 32 {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"},
28 {15, nullptr, "GetPersonalizedTicketSize"}, 33 {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"},
29 {16, nullptr, "GetCommonTicketData"}, 34 {16, &ETicket::GetCommonTicketData, "GetCommonTicketData"},
30 {17, nullptr, "GetPersonalizedTicketData"}, 35 {17, &ETicket::GetPersonalizedTicketData, "GetPersonalizedTicketData"},
31 {18, nullptr, "OwnTicket"}, 36 {18, nullptr, "OwnTicket"},
32 {19, nullptr, "GetTicketInfo"}, 37 {19, nullptr, "GetTicketInfo"},
33 {20, nullptr, "ListLightTicketInfo"}, 38 {20, nullptr, "ListLightTicketInfo"},
@@ -51,7 +56,212 @@ public:
51 }; 56 };
52 // clang-format on 57 // clang-format on
53 RegisterHandlers(functions); 58 RegisterHandlers(functions);
59
60 keys.PopulateTickets();
61 keys.SynthesizeTickets();
62 }
63
64private:
65 bool CheckRightsId(Kernel::HLERequestContext& ctx, const u128& rights_id) {
66 if (rights_id == u128{}) {
67 LOG_ERROR(Service_ETicket, "The rights ID was invalid!");
68 IPC::ResponseBuilder rb{ctx, 2};
69 rb.Push(ERROR_INVALID_RIGHTS_ID);
70 return false;
71 }
72
73 return true;
74 }
75
76 void ImportTicket(Kernel::HLERequestContext& ctx) {
77 IPC::RequestParser rp{ctx};
78 const auto ticket = ctx.ReadBuffer();
79 const auto cert = ctx.ReadBuffer(1);
80
81 if (ticket.size() < sizeof(Core::Crypto::Ticket)) {
82 LOG_ERROR(Service_ETicket, "The input buffer is not large enough!");
83 IPC::ResponseBuilder rb{ctx, 2};
84 rb.Push(ERROR_INVALID_ARGUMENT);
85 return;
86 }
87
88 Core::Crypto::Ticket raw{};
89 std::memcpy(&raw, ticket.data(), sizeof(Core::Crypto::Ticket));
90
91 if (!keys.AddTicketPersonalized(raw)) {
92 LOG_ERROR(Service_ETicket, "The ticket could not be imported!");
93 IPC::ResponseBuilder rb{ctx, 2};
94 rb.Push(ERROR_INVALID_ARGUMENT);
95 return;
96 }
97
98 IPC::ResponseBuilder rb{ctx, 2};
99 rb.Push(RESULT_SUCCESS);
100 }
101
102 void GetTitleKey(Kernel::HLERequestContext& ctx) {
103 IPC::RequestParser rp{ctx};
104 const auto rights_id = rp.PopRaw<u128>();
105
106 LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]);
107
108 if (!CheckRightsId(ctx, rights_id))
109 return;
110
111 const auto key =
112 keys.GetKey(Core::Crypto::S128KeyType::Titlekey, rights_id[1], rights_id[0]);
113
114 if (key == Core::Crypto::Key128{}) {
115 LOG_ERROR(Service_ETicket,
116 "The titlekey doesn't exist in the KeyManager or the rights ID was invalid!");
117 IPC::ResponseBuilder rb{ctx, 2};
118 rb.Push(ERROR_INVALID_RIGHTS_ID);
119 return;
120 }
121
122 ctx.WriteBuffer(key.data(), key.size());
123
124 IPC::ResponseBuilder rb{ctx, 2};
125 rb.Push(RESULT_SUCCESS);
126 }
127
128 void CountCommonTicket(Kernel::HLERequestContext& ctx) {
129 LOG_DEBUG(Service_ETicket, "called");
130
131 const auto count = keys.GetCommonTickets().size();
132
133 IPC::ResponseBuilder rb{ctx, 3};
134 rb.Push(RESULT_SUCCESS);
135 rb.Push<u32>(count);
136 }
137
138 void CountPersonalizedTicket(Kernel::HLERequestContext& ctx) {
139 LOG_DEBUG(Service_ETicket, "called");
140
141 const auto count = keys.GetPersonalizedTickets().size();
142
143 IPC::ResponseBuilder rb{ctx, 3};
144 rb.Push(RESULT_SUCCESS);
145 rb.Push<u32>(count);
146 }
147
148 void ListCommonTicket(Kernel::HLERequestContext& ctx) {
149 u32 out_entries;
150 if (keys.GetCommonTickets().empty())
151 out_entries = 0;
152 else
153 out_entries = ctx.GetWriteBufferSize() / sizeof(u128);
154
155 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries);
156
157 keys.PopulateTickets();
158 const auto tickets = keys.GetCommonTickets();
159 std::vector<u128> ids;
160 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),
161 [](const auto& pair) { return pair.first; });
162
163 out_entries = std::min<u32>(ids.size(), out_entries);
164 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));
165
166 IPC::ResponseBuilder rb{ctx, 3};
167 rb.Push(RESULT_SUCCESS);
168 rb.Push<u32>(out_entries);
54 } 169 }
170
171 void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) {
172 u32 out_entries;
173 if (keys.GetPersonalizedTickets().empty())
174 out_entries = 0;
175 else
176 out_entries = ctx.GetWriteBufferSize() / sizeof(u128);
177
178 LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries);
179
180 keys.PopulateTickets();
181 const auto tickets = keys.GetPersonalizedTickets();
182 std::vector<u128> ids;
183 std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),
184 [](const auto& pair) { return pair.first; });
185
186 out_entries = std::min<u32>(ids.size(), out_entries);
187 ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));
188
189 IPC::ResponseBuilder rb{ctx, 3};
190 rb.Push(RESULT_SUCCESS);
191 rb.Push<u32>(out_entries);
192 }
193
194 void GetCommonTicketSize(Kernel::HLERequestContext& ctx) {
195 IPC::RequestParser rp{ctx};
196 const auto rights_id = rp.PopRaw<u128>();
197
198 LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]);
199
200 if (!CheckRightsId(ctx, rights_id))
201 return;
202
203 const auto ticket = keys.GetCommonTickets().at(rights_id);
204
205 IPC::ResponseBuilder rb{ctx, 4};
206 rb.Push(RESULT_SUCCESS);
207 rb.Push<u64>(ticket.GetSize());
208 }
209
210 void GetPersonalizedTicketSize(Kernel::HLERequestContext& ctx) {
211 IPC::RequestParser rp{ctx};
212 const auto rights_id = rp.PopRaw<u128>();
213
214 LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]);
215
216 if (!CheckRightsId(ctx, rights_id))
217 return;
218
219 const auto ticket = keys.GetPersonalizedTickets().at(rights_id);
220
221 IPC::ResponseBuilder rb{ctx, 4};
222 rb.Push(RESULT_SUCCESS);
223 rb.Push<u64>(ticket.GetSize());
224 }
225
226 void GetCommonTicketData(Kernel::HLERequestContext& ctx) {
227 IPC::RequestParser rp{ctx};
228 const auto rights_id = rp.PopRaw<u128>();
229
230 LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]);
231
232 if (!CheckRightsId(ctx, rights_id))
233 return;
234
235 const auto ticket = keys.GetCommonTickets().at(rights_id);
236
237 const auto write_size = std::min<u64>(ticket.GetSize(), ctx.GetWriteBufferSize());
238 ctx.WriteBuffer(&ticket, write_size);
239
240 IPC::ResponseBuilder rb{ctx, 4};
241 rb.Push(RESULT_SUCCESS);
242 rb.Push<u64>(write_size);
243 }
244
245 void GetPersonalizedTicketData(Kernel::HLERequestContext& ctx) {
246 IPC::RequestParser rp{ctx};
247 const auto rights_id = rp.PopRaw<u128>();
248
249 LOG_DEBUG(Service_ETicket, "called, rights_id={:016X}{:016X}", rights_id[1], rights_id[0]);
250
251 if (!CheckRightsId(ctx, rights_id))
252 return;
253
254 const auto ticket = keys.GetPersonalizedTickets().at(rights_id);
255
256 const auto write_size = std::min<u64>(ticket.GetSize(), ctx.GetWriteBufferSize());
257 ctx.WriteBuffer(&ticket, write_size);
258
259 IPC::ResponseBuilder rb{ctx, 4};
260 rb.Push(RESULT_SUCCESS);
261 rb.Push<u64>(write_size);
262 }
263
264 Core::Crypto::KeyManager keys;
55}; 265};
56 266
57void InstallInterfaces(SM::ServiceManager& service_manager) { 267void InstallInterfaces(SM::ServiceManager& service_manager) {
diff --git a/src/core/hle/service/fatal/fatal.cpp b/src/core/hle/service/fatal/fatal.cpp
index fe49c2161..01fa06ad3 100644
--- a/src/core/hle/service/fatal/fatal.cpp
+++ b/src/core/hle/service/fatal/fatal.cpp
@@ -5,7 +5,7 @@
5#include <array> 5#include <array>
6#include <cstring> 6#include <cstring>
7#include <ctime> 7#include <ctime>
8#include <fmt/time.h> 8#include <fmt/chrono.h>
9#include "common/file_util.h" 9#include "common/file_util.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/scm_rev.h" 11#include "common/scm_rev.h"
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 1ebfeb4bf..8ce110dd1 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -472,12 +472,12 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
472 } 472 }
473} 473}
474 474
475void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs) { 475void InstallInterfaces(Core::System& system) {
476 romfs_factory = nullptr; 476 romfs_factory = nullptr;
477 CreateFactories(vfs, false); 477 CreateFactories(*system.GetFilesystem(), false);
478 std::make_shared<FSP_LDR>()->InstallAsService(service_manager); 478 std::make_shared<FSP_LDR>()->InstallAsService(system.ServiceManager());
479 std::make_shared<FSP_PR>()->InstallAsService(service_manager); 479 std::make_shared<FSP_PR>()->InstallAsService(system.ServiceManager());
480 std::make_shared<FSP_SRV>()->InstallAsService(service_manager); 480 std::make_shared<FSP_SRV>(system.GetReporter())->InstallAsService(system.ServiceManager());
481} 481}
482 482
483} // namespace Service::FileSystem 483} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6481f237c..3849dd89e 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -65,7 +65,7 @@ FileSys::VirtualDir GetModificationDumpRoot(u64 title_id);
65// above is called. 65// above is called.
66void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); 66void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true);
67 67
68void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs); 68void InstallInterfaces(Core::System& system);
69 69
70// A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of 70// A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of
71// pointers and booleans. This makes using a VfsDirectory with switch services much easier and 71// pointers and booleans. This makes using a VfsDirectory with switch services much easier and
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index e7df8fd98..d3cd46a9b 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -26,6 +26,7 @@
26#include "core/hle/kernel/process.h" 26#include "core/hle/kernel/process.h"
27#include "core/hle/service/filesystem/filesystem.h" 27#include "core/hle/service/filesystem/filesystem.h"
28#include "core/hle/service/filesystem/fsp_srv.h" 28#include "core/hle/service/filesystem/fsp_srv.h"
29#include "core/reporter.h"
29 30
30namespace Service::FileSystem { 31namespace Service::FileSystem {
31 32
@@ -613,7 +614,7 @@ private:
613 u64 next_entry_index = 0; 614 u64 next_entry_index = 0;
614}; 615};
615 616
616FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { 617FSP_SRV::FSP_SRV(const Core::Reporter& reporter) : ServiceFramework("fsp-srv"), reporter(reporter) {
617 // clang-format off 618 // clang-format off
618 static const FunctionInfo functions[] = { 619 static const FunctionInfo functions[] = {
619 {0, nullptr, "OpenFileSystem"}, 620 {0, nullptr, "OpenFileSystem"},
@@ -710,14 +711,14 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
710 {1001, nullptr, "SetSaveDataSize"}, 711 {1001, nullptr, "SetSaveDataSize"},
711 {1002, nullptr, "SetSaveDataRootPath"}, 712 {1002, nullptr, "SetSaveDataRootPath"},
712 {1003, nullptr, "DisableAutoSaveDataCreation"}, 713 {1003, nullptr, "DisableAutoSaveDataCreation"},
713 {1004, nullptr, "SetGlobalAccessLogMode"}, 714 {1004, &FSP_SRV::SetGlobalAccessLogMode, "SetGlobalAccessLogMode"},
714 {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, 715 {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"},
715 {1006, nullptr, "OutputAccessLogToSdCard"}, 716 {1006, &FSP_SRV::OutputAccessLogToSdCard, "OutputAccessLogToSdCard"},
716 {1007, nullptr, "RegisterUpdatePartition"}, 717 {1007, nullptr, "RegisterUpdatePartition"},
717 {1008, nullptr, "OpenRegisteredUpdatePartition"}, 718 {1008, nullptr, "OpenRegisteredUpdatePartition"},
718 {1009, nullptr, "GetAndClearMemoryReportInfo"}, 719 {1009, nullptr, "GetAndClearMemoryReportInfo"},
719 {1010, nullptr, "SetDataStorageRedirectTarget"}, 720 {1010, nullptr, "SetDataStorageRedirectTarget"},
720 {1011, nullptr, "OutputAccessLogToSdCard2"}, 721 {1011, &FSP_SRV::GetAccessLogVersionInfo, "GetAccessLogVersionInfo"},
721 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, 722 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
722 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, 723 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
723 {1200, nullptr, "OpenMultiCommitManager"}, 724 {1200, nullptr, "OpenMultiCommitManager"},
@@ -814,21 +815,22 @@ void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext&
814 rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); 815 rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space));
815} 816}
816 817
817void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { 818void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
818 LOG_WARNING(Service_FS, "(STUBBED) called"); 819 IPC::RequestParser rp{ctx};
820 log_mode = rp.PopEnum<LogMode>();
819 821
820 enum class LogMode : u32 { 822 LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode));
821 Off, 823
822 Log, 824 IPC::ResponseBuilder rb{ctx, 2};
823 RedirectToSdCard, 825 rb.Push(RESULT_SUCCESS);
824 LogToSdCard = Log | RedirectToSdCard, 826}
825 }; 827
828void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
829 LOG_DEBUG(Service_FS, "called");
826 830
827 // Given we always want to receive logging information,
828 // we always specify logging as enabled.
829 IPC::ResponseBuilder rb{ctx, 3}; 831 IPC::ResponseBuilder rb{ctx, 3};
830 rb.Push(RESULT_SUCCESS); 832 rb.Push(RESULT_SUCCESS);
831 rb.PushEnum(LogMode::Log); 833 rb.PushEnum(log_mode);
832} 834}
833 835
834void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { 836void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
@@ -902,4 +904,26 @@ void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ct
902 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); 904 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
903} 905}
904 906
907void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) {
908 const auto raw = ctx.ReadBuffer();
909 auto log = Common::StringFromFixedZeroTerminatedBuffer(
910 reinterpret_cast<const char*>(raw.data()), raw.size());
911
912 LOG_DEBUG(Service_FS, "called, log='{}'", log);
913
914 reporter.SaveFilesystemAccessReport(log_mode, std::move(log));
915
916 IPC::ResponseBuilder rb{ctx, 2};
917 rb.Push(RESULT_SUCCESS);
918}
919
920void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) {
921 LOG_DEBUG(Service_FS, "called");
922
923 IPC::ResponseBuilder rb{ctx, 4};
924 rb.Push(RESULT_SUCCESS);
925 rb.PushEnum(AccessLogVersion::Latest);
926 rb.Push(access_log_program_index);
927}
928
905} // namespace Service::FileSystem 929} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index d7572ba7a..b5486a193 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -7,15 +7,32 @@
7#include <memory> 7#include <memory>
8#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
9 9
10namespace Core {
11class Reporter;
12}
13
10namespace FileSys { 14namespace FileSys {
11class FileSystemBackend; 15class FileSystemBackend;
12} 16}
13 17
14namespace Service::FileSystem { 18namespace Service::FileSystem {
15 19
20enum class AccessLogVersion : u32 {
21 V7_0_0 = 2,
22
23 Latest = V7_0_0,
24};
25
26enum class LogMode : u32 {
27 Off,
28 Log,
29 RedirectToSdCard,
30 LogToSdCard = Log | RedirectToSdCard,
31};
32
16class FSP_SRV final : public ServiceFramework<FSP_SRV> { 33class FSP_SRV final : public ServiceFramework<FSP_SRV> {
17public: 34public:
18 explicit FSP_SRV(); 35 explicit FSP_SRV(const Core::Reporter& reporter);
19 ~FSP_SRV() override; 36 ~FSP_SRV() override;
20 37
21private: 38private:
@@ -26,13 +43,20 @@ private:
26 void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); 43 void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
27 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); 44 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
28 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); 45 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
46 void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
29 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); 47 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
30 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 48 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
31 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); 49 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 50 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
51 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
52 void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx);
33 53
34 FileSys::VirtualFile romfs; 54 FileSys::VirtualFile romfs;
35 u64 current_process_id = 0; 55 u64 current_process_id = 0;
56 u32 access_log_program_index = 0;
57 LogMode log_mode = LogMode::LogToSdCard;
58
59 const Core::Reporter& reporter;
36}; 60};
37 61
38} // namespace Service::FileSystem 62} // namespace Service::FileSystem
diff --git a/src/core/hle/service/friend/errors.h b/src/core/hle/service/friend/errors.h
new file mode 100644
index 000000000..b3996e275
--- /dev/null
+++ b/src/core/hle/service/friend/errors.h
@@ -0,0 +1,12 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::Friend {
10
11constexpr ResultCode ERR_NO_NOTIFICATIONS{ErrorModule::Account, 15};
12}
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 5100e376c..d1ec12ef9 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -2,8 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <queue>
5#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/uuid.h"
6#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/readable_event.h"
10#include "core/hle/kernel/writable_event.h"
11#include "core/hle/service/friend/errors.h"
7#include "core/hle/service/friend/friend.h" 12#include "core/hle/service/friend/friend.h"
8#include "core/hle/service/friend/interface.h" 13#include "core/hle/service/friend/interface.h"
9 14
@@ -17,7 +22,7 @@ public:
17 {0, nullptr, "GetCompletionEvent"}, 22 {0, nullptr, "GetCompletionEvent"},
18 {1, nullptr, "Cancel"}, 23 {1, nullptr, "Cancel"},
19 {10100, nullptr, "GetFriendListIds"}, 24 {10100, nullptr, "GetFriendListIds"},
20 {10101, nullptr, "GetFriendList"}, 25 {10101, &IFriendService::GetFriendList, "GetFriendList"},
21 {10102, nullptr, "UpdateFriendInfo"}, 26 {10102, nullptr, "UpdateFriendInfo"},
22 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
23 {10200, nullptr, "SendFriendRequestForApplication"}, 28 {10200, nullptr, "SendFriendRequestForApplication"},
@@ -94,6 +99,23 @@ public:
94 } 99 }
95 100
96private: 101private:
102 enum class PresenceFilter : u32 {
103 None = 0,
104 Online = 1,
105 OnlinePlay = 2,
106 OnlineOrOnlinePlay = 3,
107 };
108
109 struct SizedFriendFilter {
110 PresenceFilter presence;
111 u8 is_favorite;
112 u8 same_app;
113 u8 same_app_played;
114 u8 arbitary_app_played;
115 u64 group_id;
116 };
117 static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
118
97 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { 119 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
98 // Stub used by Splatoon 2 120 // Stub used by Splatoon 2
99 LOG_WARNING(Service_ACC, "(STUBBED) called"); 121 LOG_WARNING(Service_ACC, "(STUBBED) called");
@@ -107,6 +129,121 @@ private:
107 IPC::ResponseBuilder rb{ctx, 2}; 129 IPC::ResponseBuilder rb{ctx, 2};
108 rb.Push(RESULT_SUCCESS); 130 rb.Push(RESULT_SUCCESS);
109 } 131 }
132
133 void GetFriendList(Kernel::HLERequestContext& ctx) {
134 IPC::RequestParser rp{ctx};
135 const auto friend_offset = rp.Pop<u32>();
136 const auto uuid = rp.PopRaw<Common::UUID>();
137 [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>();
138 const auto pid = rp.Pop<u64>();
139 LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset,
140 uuid.Format(), pid);
141
142 IPC::ResponseBuilder rb{ctx, 3};
143 rb.Push(RESULT_SUCCESS);
144
145 rb.Push<u32>(0); // Friend count
146 // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId"
147 }
148};
149
150class INotificationService final : public ServiceFramework<INotificationService> {
151public:
152 INotificationService(Common::UUID uuid) : ServiceFramework("INotificationService"), uuid(uuid) {
153 // clang-format off
154 static const FunctionInfo functions[] = {
155 {0, &INotificationService::GetEvent, "GetEvent"},
156 {1, &INotificationService::Clear, "Clear"},
157 {2, &INotificationService::Pop, "Pop"}
158 };
159 // clang-format on
160
161 RegisterHandlers(functions);
162 }
163
164private:
165 void GetEvent(Kernel::HLERequestContext& ctx) {
166 LOG_DEBUG(Service_ACC, "called");
167
168 IPC::ResponseBuilder rb{ctx, 2, 1};
169 rb.Push(RESULT_SUCCESS);
170
171 if (!is_event_created) {
172 auto& kernel = Core::System::GetInstance().Kernel();
173 notification_event = Kernel::WritableEvent::CreateEventPair(
174 kernel, Kernel::ResetType::Manual, "INotificationService:NotifyEvent");
175 is_event_created = true;
176 }
177 rb.PushCopyObjects(notification_event.readable);
178 }
179
180 void Clear(Kernel::HLERequestContext& ctx) {
181 LOG_DEBUG(Service_ACC, "called");
182 while (!notifications.empty()) {
183 notifications.pop();
184 }
185 std::memset(&states, 0, sizeof(States));
186
187 IPC::ResponseBuilder rb{ctx, 2};
188 rb.Push(RESULT_SUCCESS);
189 }
190
191 void Pop(Kernel::HLERequestContext& ctx) {
192 LOG_DEBUG(Service_ACC, "called");
193
194 if (notifications.empty()) {
195 LOG_ERROR(Service_ACC, "No notifications in queue!");
196 IPC::ResponseBuilder rb{ctx, 2};
197 rb.Push(ERR_NO_NOTIFICATIONS);
198 return;
199 }
200
201 const auto notification = notifications.front();
202 notifications.pop();
203
204 switch (notification.notification_type) {
205 case NotificationTypes::HasUpdatedFriendsList:
206 states.has_updated_friends = false;
207 break;
208 case NotificationTypes::HasReceivedFriendRequest:
209 states.has_received_friend_request = false;
210 break;
211 default:
212 // HOS seems not have an error case for an unknown notification
213 LOG_WARNING(Service_ACC, "Unknown notification {:08X}",
214 static_cast<u32>(notification.notification_type));
215 break;
216 }
217
218 IPC::ResponseBuilder rb{ctx, 6};
219 rb.Push(RESULT_SUCCESS);
220 rb.PushRaw<SizedNotificationInfo>(notification);
221 }
222
223 enum class NotificationTypes : u32 {
224 HasUpdatedFriendsList = 0x65,
225 HasReceivedFriendRequest = 0x1
226 };
227
228 struct SizedNotificationInfo {
229 NotificationTypes notification_type;
230 INSERT_PADDING_WORDS(
231 1); // TODO(ogniK): This doesn't seem to be used within any IPC returns as of now
232 u64_le account_id;
233 };
234 static_assert(sizeof(SizedNotificationInfo) == 0x10,
235 "SizedNotificationInfo is an incorrect size");
236
237 struct States {
238 bool has_updated_friends;
239 bool has_received_friend_request;
240 };
241
242 Common::UUID uuid;
243 bool is_event_created = false;
244 Kernel::EventPair notification_event;
245 std::queue<SizedNotificationInfo> notifications;
246 States states{};
110}; 247};
111 248
112void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) { 249void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) {
@@ -116,6 +253,17 @@ void Module::Interface::CreateFriendService(Kernel::HLERequestContext& ctx) {
116 LOG_DEBUG(Service_ACC, "called"); 253 LOG_DEBUG(Service_ACC, "called");
117} 254}
118 255
256void Module::Interface::CreateNotificationService(Kernel::HLERequestContext& ctx) {
257 IPC::RequestParser rp{ctx};
258 auto uuid = rp.PopRaw<Common::UUID>();
259
260 LOG_DEBUG(Service_ACC, "called, uuid={}", uuid.Format());
261
262 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
263 rb.Push(RESULT_SUCCESS);
264 rb.PushIpcInterface<INotificationService>(uuid);
265}
266
119Module::Interface::Interface(std::shared_ptr<Module> module, const char* name) 267Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
120 : ServiceFramework(name), module(std::move(module)) {} 268 : ServiceFramework(name), module(std::move(module)) {}
121 269
diff --git a/src/core/hle/service/friend/friend.h b/src/core/hle/service/friend/friend.h
index e762840cb..38d05fa8e 100644
--- a/src/core/hle/service/friend/friend.h
+++ b/src/core/hle/service/friend/friend.h
@@ -16,6 +16,7 @@ public:
16 ~Interface() override; 16 ~Interface() override;
17 17
18 void CreateFriendService(Kernel::HLERequestContext& ctx); 18 void CreateFriendService(Kernel::HLERequestContext& ctx);
19 void CreateNotificationService(Kernel::HLERequestContext& ctx);
19 20
20 protected: 21 protected:
21 std::shared_ptr<Module> module; 22 std::shared_ptr<Module> module;
diff --git a/src/core/hle/service/friend/interface.cpp b/src/core/hle/service/friend/interface.cpp
index 5a6840af5..5b384f733 100644
--- a/src/core/hle/service/friend/interface.cpp
+++ b/src/core/hle/service/friend/interface.cpp
@@ -10,7 +10,7 @@ Friend::Friend(std::shared_ptr<Module> module, const char* name)
10 : Interface(std::move(module), name) { 10 : Interface(std::move(module), name) {
11 static const FunctionInfo functions[] = { 11 static const FunctionInfo functions[] = {
12 {0, &Friend::CreateFriendService, "CreateFriendService"}, 12 {0, &Friend::CreateFriendService, "CreateFriendService"},
13 {1, nullptr, "CreateNotificationService"}, 13 {1, &Friend::CreateNotificationService, "CreateNotificationService"},
14 {2, nullptr, "CreateDaemonSuspendSessionService"}, 14 {2, nullptr, "CreateDaemonSuspendSessionService"},
15 }; 15 };
16 RegisterHandlers(functions); 16 RegisterHandlers(functions);
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index fdd6d79a2..e47fe8188 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -548,6 +548,37 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
548 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; 548 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
549} 549}
550 550
551void Controller_NPad::StartLRAssignmentMode() {
552 // Nothing internally is used for lr assignment mode. Since we have the ability to set the
553 // controller types from boot, it doesn't really matter about showing a selection screen
554 is_in_lr_assignment_mode = true;
555}
556
557void Controller_NPad::StopLRAssignmentMode() {
558 is_in_lr_assignment_mode = false;
559}
560
561bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) {
562 if (npad_id_1 == NPAD_HANDHELD || npad_id_2 == NPAD_HANDHELD || npad_id_1 == NPAD_UNKNOWN ||
563 npad_id_2 == NPAD_UNKNOWN) {
564 return true;
565 }
566 const auto npad_index_1 = NPadIdToIndex(npad_id_1);
567 const auto npad_index_2 = NPadIdToIndex(npad_id_2);
568
569 if (!IsControllerSupported(connected_controllers[npad_index_1].type) ||
570 !IsControllerSupported(connected_controllers[npad_index_2].type)) {
571 return false;
572 }
573
574 std::swap(connected_controllers[npad_index_1].type, connected_controllers[npad_index_2].type);
575
576 InitNewlyAddedControler(npad_index_1);
577 InitNewlyAddedControler(npad_index_2);
578
579 return true;
580}
581
551bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { 582bool Controller_NPad::IsControllerSupported(NPadControllerType controller) {
552 if (controller == NPadControllerType::Handheld) { 583 if (controller == NPadControllerType::Handheld) {
553 // Handheld is not even a supported type, lets stop here 584 // Handheld is not even a supported type, lets stop here
@@ -605,10 +636,15 @@ Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) {
605 return LedPattern{0, 0, 0, 0}; 636 return LedPattern{0, 0, 0, 0};
606 }; 637 };
607} 638}
639
608void Controller_NPad::SetVibrationEnabled(bool can_vibrate) { 640void Controller_NPad::SetVibrationEnabled(bool can_vibrate) {
609 can_controllers_vibrate = can_vibrate; 641 can_controllers_vibrate = can_vibrate;
610} 642}
611 643
644bool Controller_NPad::IsVibrationEnabled() const {
645 return can_controllers_vibrate;
646}
647
612void Controller_NPad::ClearAllConnectedControllers() { 648void Controller_NPad::ClearAllConnectedControllers() {
613 for (auto& controller : connected_controllers) { 649 for (auto& controller : connected_controllers) {
614 if (controller.is_connected && controller.type != NPadControllerType::None) { 650 if (controller.is_connected && controller.type != NPadControllerType::None) {
@@ -617,6 +653,7 @@ void Controller_NPad::ClearAllConnectedControllers() {
617 } 653 }
618 } 654 }
619} 655}
656
620void Controller_NPad::DisconnectAllConnectedControllers() { 657void Controller_NPad::DisconnectAllConnectedControllers() {
621 std::for_each(connected_controllers.begin(), connected_controllers.end(), 658 std::for_each(connected_controllers.begin(), connected_controllers.end(),
622 [](ControllerHolder& controller) { controller.is_connected = false; }); 659 [](ControllerHolder& controller) { controller.is_connected = false; });
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 4ff50b3cd..f28b36806 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -119,11 +119,16 @@ public:
119 void DisconnectNPad(u32 npad_id); 119 void DisconnectNPad(u32 npad_id);
120 LedPattern GetLedPattern(u32 npad_id); 120 LedPattern GetLedPattern(u32 npad_id);
121 void SetVibrationEnabled(bool can_vibrate); 121 void SetVibrationEnabled(bool can_vibrate);
122 bool IsVibrationEnabled() const;
122 void ClearAllConnectedControllers(); 123 void ClearAllConnectedControllers();
123 void DisconnectAllConnectedControllers(); 124 void DisconnectAllConnectedControllers();
124 void ConnectAllDisconnectedControllers(); 125 void ConnectAllDisconnectedControllers();
125 void ClearAllControllers(); 126 void ClearAllControllers();
126 127
128 void StartLRAssignmentMode();
129 void StopLRAssignmentMode();
130 bool SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2);
131
127 // Logical OR for all buttons presses on all controllers 132 // Logical OR for all buttons presses on all controllers
128 // Specifically for cheat engine and other features. 133 // Specifically for cheat engine and other features.
129 u32 GetAndResetPressState(); 134 u32 GetAndResetPressState();
@@ -321,5 +326,6 @@ private:
321 void RequestPadStateUpdate(u32 npad_id); 326 void RequestPadStateUpdate(u32 npad_id);
322 std::array<ControllerPad, 10> npad_pad_states{}; 327 std::array<ControllerPad, 10> npad_pad_states{};
323 bool IsControllerSupported(NPadControllerType controller); 328 bool IsControllerSupported(NPadControllerType controller);
329 bool is_in_lr_assignment_mode{false};
324}; 330};
325} // namespace Service::HID 331} // namespace Service::HID
diff --git a/src/core/hle/service/hid/errors.h b/src/core/hle/service/hid/errors.h
new file mode 100644
index 000000000..3583642e7
--- /dev/null
+++ b/src/core/hle/service/hid/errors.h
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::HID {
10
11constexpr ResultCode ERR_NPAD_NOT_CONNECTED{ErrorModule::HID, 710};
12
13} // namespace Service::HID
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index a4ad95d96..f8b1ca816 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -16,6 +16,7 @@
16#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
17#include "core/hle/kernel/shared_memory.h" 17#include "core/hle/kernel/shared_memory.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/hid/errors.h"
19#include "core/hle/service/hid/hid.h" 20#include "core/hle/service/hid/hid.h"
20#include "core/hle/service/hid/irs.h" 21#include "core/hle/service/hid/irs.h"
21#include "core/hle/service/hid/xcd.h" 22#include "core/hle/service/hid/xcd.h"
@@ -202,11 +203,11 @@ Hid::Hid() : ServiceFramework("hid") {
202 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, 203 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
203 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, 204 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
204 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, 205 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
205 {126, nullptr, "StartLrAssignmentMode"}, 206 {126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"},
206 {127, nullptr, "StopLrAssignmentMode"}, 207 {127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"},
207 {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, 208 {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"},
208 {129, nullptr, "GetNpadHandheldActivationMode"}, 209 {129, nullptr, "GetNpadHandheldActivationMode"},
209 {130, nullptr, "SwapNpadAssignment"}, 210 {130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"},
210 {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, 211 {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
211 {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, 212 {132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
212 {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, 213 {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"},
@@ -215,8 +216,8 @@ Hid::Hid() : ServiceFramework("hid") {
215 {201, &Hid::SendVibrationValue, "SendVibrationValue"}, 216 {201, &Hid::SendVibrationValue, "SendVibrationValue"},
216 {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"}, 217 {202, &Hid::GetActualVibrationValue, "GetActualVibrationValue"},
217 {203, &Hid::CreateActiveVibrationDeviceList, "CreateActiveVibrationDeviceList"}, 218 {203, &Hid::CreateActiveVibrationDeviceList, "CreateActiveVibrationDeviceList"},
218 {204, nullptr, "PermitVibration"}, 219 {204, &Hid::PermitVibration, "PermitVibration"},
219 {205, nullptr, "IsVibrationPermitted"}, 220 {205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"},
220 {206, &Hid::SendVibrationValues, "SendVibrationValues"}, 221 {206, &Hid::SendVibrationValues, "SendVibrationValues"},
221 {207, nullptr, "SendVibrationGcErmCommand"}, 222 {207, nullptr, "SendVibrationGcErmCommand"},
222 {208, nullptr, "GetActualVibrationGcErmCommand"}, 223 {208, nullptr, "GetActualVibrationGcErmCommand"},
@@ -678,6 +679,27 @@ void Hid::CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx) {
678 rb.PushIpcInterface<IActiveVibrationDeviceList>(); 679 rb.PushIpcInterface<IActiveVibrationDeviceList>();
679} 680}
680 681
682void Hid::PermitVibration(Kernel::HLERequestContext& ctx) {
683 IPC::RequestParser rp{ctx};
684 const auto can_vibrate{rp.Pop<bool>()};
685 applet_resource->GetController<Controller_NPad>(HidController::NPad)
686 .SetVibrationEnabled(can_vibrate);
687
688 LOG_DEBUG(Service_HID, "called, can_vibrate={}", can_vibrate);
689
690 IPC::ResponseBuilder rb{ctx, 2};
691 rb.Push(RESULT_SUCCESS);
692}
693
694void Hid::IsVibrationPermitted(Kernel::HLERequestContext& ctx) {
695 LOG_DEBUG(Service_HID, "called");
696
697 IPC::ResponseBuilder rb{ctx, 3};
698 rb.Push(RESULT_SUCCESS);
699 rb.Push(
700 applet_resource->GetController<Controller_NPad>(HidController::NPad).IsVibrationEnabled());
701}
702
681void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) { 703void Hid::ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
682 IPC::RequestParser rp{ctx}; 704 IPC::RequestParser rp{ctx};
683 const auto applet_resource_user_id{rp.Pop<u64>()}; 705 const auto applet_resource_user_id{rp.Pop<u64>()};
@@ -733,6 +755,49 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
733 rb.Push(RESULT_SUCCESS); 755 rb.Push(RESULT_SUCCESS);
734} 756}
735 757
758void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
759 IPC::RequestParser rp{ctx};
760 const auto applet_resource_user_id{rp.Pop<u64>()};
761
762 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
763 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
764 controller.StartLRAssignmentMode();
765
766 IPC::ResponseBuilder rb{ctx, 2};
767 rb.Push(RESULT_SUCCESS);
768}
769
770void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
771 IPC::RequestParser rp{ctx};
772 const auto applet_resource_user_id{rp.Pop<u64>()};
773
774 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
775 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
776 controller.StopLRAssignmentMode();
777
778 IPC::ResponseBuilder rb{ctx, 2};
779 rb.Push(RESULT_SUCCESS);
780}
781
782void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
783 IPC::RequestParser rp{ctx};
784 const auto npad_1{rp.Pop<u32>()};
785 const auto npad_2{rp.Pop<u32>()};
786 const auto applet_resource_user_id{rp.Pop<u64>()};
787
788 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
789 applet_resource_user_id, npad_1, npad_2);
790
791 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
792 IPC::ResponseBuilder rb{ctx, 2};
793 if (controller.SwapNpadAssignment(npad_1, npad_2)) {
794 rb.Push(RESULT_SUCCESS);
795 } else {
796 LOG_ERROR(Service_HID, "Npads are not connected!");
797 rb.Push(ERR_NPAD_NOT_CONNECTED);
798 }
799}
800
736class HidDbg final : public ServiceFramework<HidDbg> { 801class HidDbg final : public ServiceFramework<HidDbg> {
737public: 802public:
738 explicit HidDbg() : ServiceFramework{"hid:dbg"} { 803 explicit HidDbg() : ServiceFramework{"hid:dbg"} {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index d3660cad2..2fd6d9fc7 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -114,11 +114,16 @@ private:
114 void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx); 114 void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
115 void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx); 115 void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx);
116 void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx); 116 void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx);
117 void PermitVibration(Kernel::HLERequestContext& ctx);
118 void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
117 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 119 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
118 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 120 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
119 void StopSixAxisSensor(Kernel::HLERequestContext& ctx); 121 void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
120 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); 122 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
121 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); 123 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
124 void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
125 void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
126 void SwapNpadAssignment(Kernel::HLERequestContext& ctx);
122 127
123 std::shared_ptr<IAppletResource> applet_resource; 128 std::shared_ptr<IAppletResource> applet_resource;
124}; 129};
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index b839303ac..8ddad8682 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -345,14 +345,16 @@ public:
345 vm_manager 345 vm_manager
346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) 346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
347 .IsSuccess()); 347 .IsSuccess());
348 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 348 ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None)
349 .IsSuccess());
349 350
350 if (bss_size > 0) { 351 if (bss_size > 0) {
351 ASSERT(vm_manager 352 ASSERT(vm_manager
352 .MirrorMemory(*map_address + nro_size, bss_address, bss_size, 353 .MirrorMemory(*map_address + nro_size, bss_address, bss_size,
353 Kernel::MemoryState::ModuleCode) 354 Kernel::MemoryState::ModuleCode)
354 .IsSuccess()); 355 .IsSuccess());
355 ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); 356 ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None)
357 .IsSuccess());
356 } 358 }
357 359
358 vm_manager.ReprotectRange(*map_address, header.text_size, 360 vm_manager.ReprotectRange(*map_address, header.text_size,
@@ -364,7 +366,8 @@ public:
364 366
365 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 367 Core::System::GetInstance().InvalidateCpuInstructionCaches();
366 368
367 nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); 369 nro.insert_or_assign(*map_address,
370 NROInfo{hash, nro_address, nro_size, bss_address, bss_size});
368 371
369 IPC::ResponseBuilder rb{ctx, 4}; 372 IPC::ResponseBuilder rb{ctx, 4};
370 rb.Push(RESULT_SUCCESS); 373 rb.Push(RESULT_SUCCESS);
@@ -409,9 +412,23 @@ public:
409 } 412 }
410 413
411 auto& vm_manager = Core::CurrentProcess()->VMManager(); 414 auto& vm_manager = Core::CurrentProcess()->VMManager();
412 const auto& nro_size = iter->second.size; 415 const auto& nro_info = iter->second;
413 416
414 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 417 // Unmap the mirrored memory
418 ASSERT(
419 vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess());
420
421 // Reprotect the source memory
422 ASSERT(vm_manager
423 .ReprotectRange(nro_info.nro_address, nro_info.nro_size,
424 Kernel::VMAPermission::ReadWrite)
425 .IsSuccess());
426 if (nro_info.bss_size > 0) {
427 ASSERT(vm_manager
428 .ReprotectRange(nro_info.bss_address, nro_info.bss_size,
429 Kernel::VMAPermission::ReadWrite)
430 .IsSuccess());
431 }
415 432
416 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 433 Core::System::GetInstance().InvalidateCpuInstructionCaches();
417 434
@@ -473,7 +490,10 @@ private:
473 490
474 struct NROInfo { 491 struct NROInfo {
475 SHA256Hash hash; 492 SHA256Hash hash;
476 u64 size; 493 VAddr nro_address;
494 u64 nro_size;
495 VAddr bss_address;
496 u64 bss_size;
477 }; 497 };
478 498
479 bool initialized = false; 499 bool initialized = false;
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp
index ce84e25ed..0b3923ad9 100644
--- a/src/core/hle/service/mii/mii.cpp
+++ b/src/core/hle/service/mii/mii.cpp
@@ -48,7 +48,7 @@ public:
48 {19, nullptr, "Export"}, 48 {19, nullptr, "Export"},
49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, 49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"},
50 {21, &IDatabaseService::GetIndex, "GetIndex"}, 50 {21, &IDatabaseService::GetIndex, "GetIndex"},
51 {22, nullptr, "SetInterfaceVersion"}, 51 {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"},
52 {23, nullptr, "Convert"}, 52 {23, nullptr, "Convert"},
53 }; 53 };
54 // clang-format on 54 // clang-format on
@@ -350,8 +350,22 @@ private:
350 rb.Push(index); 350 rb.Push(index);
351 } 351 }
352 352
353 void SetInterfaceVersion(Kernel::HLERequestContext& ctx) {
354 IPC::RequestParser rp{ctx};
355 current_interface_version = rp.PopRaw<u32>();
356
357 LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version);
358
359 UNIMPLEMENTED_IF(current_interface_version != 1);
360
361 IPC::ResponseBuilder rb{ctx, 2};
362 rb.Push(RESULT_SUCCESS);
363 }
364
353 MiiManager db; 365 MiiManager db;
354 366
367 u32 current_interface_version = 0;
368
355 // Last read offsets of Get functions 369 // Last read offsets of Get functions
356 std::array<u32, 4> offsets{}; 370 std::array<u32, 4> offsets{};
357}; 371};
diff --git a/src/core/hle/service/mii/mii_manager.cpp b/src/core/hle/service/mii/mii_manager.cpp
index 131b01d62..8d0353075 100644
--- a/src/core/hle/service/mii/mii_manager.cpp
+++ b/src/core/hle/service/mii/mii_manager.cpp
@@ -175,6 +175,10 @@ MiiStoreData ConvertInfoToStoreData(const MiiInfo& info) {
175} // namespace 175} // namespace
176 176
177std::ostream& operator<<(std::ostream& os, Source source) { 177std::ostream& operator<<(std::ostream& os, Source source) {
178 if (static_cast<std::size_t>(source) >= SOURCE_NAMES.size()) {
179 return os << "[UNKNOWN SOURCE]";
180 }
181
178 os << SOURCE_NAMES.at(static_cast<std::size_t>(source)); 182 os << SOURCE_NAMES.at(static_cast<std::size_t>(source));
179 return os; 183 return os;
180} 184}
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index ad176f89d..2a522136d 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -77,7 +77,7 @@ enum class LoadState : u32 {
77 Done = 1, 77 Done = 1,
78}; 78};
79 79
80static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, 80static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
81 std::size_t& offset) { 81 std::size_t& offset) {
82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, 82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
83 "Shared fonts exceeds 17mb!"); 83 "Shared fonts exceeds 17mb!");
@@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
94 offset += transformed_font.size() * sizeof(u32); 94 offset += transformed_font.size() * sizeof(u32);
95} 95}
96 96
97static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, 97static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output,
98 std::size_t& offset) { 98 std::size_t& offset) {
99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); 99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; 100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
@@ -121,7 +121,7 @@ struct PL_U::Impl {
121 return shared_font_regions.at(index); 121 return shared_font_regions.at(index);
122 } 122 }
123 123
124 void BuildSharedFontsRawRegions(const std::vector<u8>& input) { 124 void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
125 // As we can derive the xor key we can just populate the offsets 125 // As we can derive the xor key we can just populate the offsets
126 // based on the shared memory dump 126 // based on the shared memory dump
127 unsigned cur_offset = 0; 127 unsigned cur_offset = 0;
@@ -144,7 +144,7 @@ struct PL_U::Impl {
144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
145 145
146 /// Backing memory for the shared font data 146 /// Backing memory for the shared font data
147 std::shared_ptr<std::vector<u8>> shared_font; 147 std::shared_ptr<Kernel::PhysicalMemory> shared_font;
148 148
149 // Automatically populated based on shared_fonts dump or system archives. 149 // Automatically populated based on shared_fonts dump or system archives.
150 std::vector<FontRegion> shared_font_regions; 150 std::vector<FontRegion> shared_font_regions;
@@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
166 // Rebuild shared fonts from data ncas 166 // Rebuild shared fonts from data ncas
167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), 167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
168 FileSys::ContentRecordType::Data)) { 168 FileSys::ContentRecordType::Data)) {
169 impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); 169 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
170 for (auto font : SHARED_FONTS) { 170 for (auto font : SHARED_FONTS) {
171 const auto nca = 171 const auto nca =
172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); 172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
207 } 207 }
208 208
209 } else { 209 } else {
210 impl->shared_font = std::make_shared<std::vector<u8>>( 210 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(
211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size 211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
212 212
213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); 213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 4f6042b00..5b8248433 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -8,6 +8,11 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/hle/service/nvdrv/nvdata.h"
12
13namespace Core {
14class System;
15}
11 16
12namespace Service::Nvidia::Devices { 17namespace Service::Nvidia::Devices {
13 18
@@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices {
15/// implement the ioctl interface. 20/// implement the ioctl interface.
16class nvdevice { 21class nvdevice {
17public: 22public:
18 nvdevice() = default; 23 explicit nvdevice(Core::System& system) : system{system} {};
19 virtual ~nvdevice() = default; 24 virtual ~nvdevice() = default;
20 union Ioctl { 25 union Ioctl {
21 u32_le raw; 26 u32_le raw;
@@ -33,7 +38,11 @@ public:
33 * @param output A buffer where the output data will be written to. 38 * @param output A buffer where the output data will be written to.
34 * @returns The result code of the ioctl. 39 * @returns The result code of the ioctl.
35 */ 40 */
36 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; 41 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
42 IoctlCtrl& ctrl) = 0;
43
44protected:
45 Core::System& system;
37}; 46};
38 47
39} // namespace Service::Nvidia::Devices 48} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 20c7c39aa..926a1285d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvdisp_disp0 ::~nvdisp_disp0() = default; 18nvdisp_disp0 ::~nvdisp_disp0() = default;
18 19
19u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 22 UNIMPLEMENTED_MSG("Unimplemented ioctl");
21 return 0; 23 return 0;
22} 24}
@@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
34 addr, offset, width, height, stride, static_cast<PixelFormat>(format), 36 addr, offset, width, height, stride, static_cast<PixelFormat>(format),
35 transform, crop_rect}; 37 transform, crop_rect};
36 38
37 auto& instance = Core::System::GetInstance(); 39 system.GetPerfStats().EndGameFrame();
38 instance.GetPerfStats().EndGameFrame(); 40 system.GPU().SwapBuffers(&framebuffer);
39 instance.GPU().SwapBuffers(framebuffer);
40} 41}
41 42
42} // namespace Service::Nvidia::Devices 43} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index 12f3ef825..e79e490ff 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvdisp_disp0 final : public nvdevice { 18class nvdisp_disp0 final : public nvdevice {
19public: 19public:
20 explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0() override; 21 ~nvdisp_disp0() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 26 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 27 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index af62d33d2..24ab3f2e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -22,10 +22,12 @@ enum {
22}; 22};
23} 23}
24 24
25nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 25nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
26 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
26nvhost_as_gpu::~nvhost_as_gpu() = default; 27nvhost_as_gpu::~nvhost_as_gpu() = default;
27 28
28u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 29u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
30 IoctlCtrl& ctrl) {
29 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 31 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
30 command.raw, input.size(), output.size()); 32 command.raw, input.size(), output.size());
31 33
@@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
65 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, 67 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
66 params.page_size, params.flags); 68 params.page_size, params.flags);
67 69
68 auto& gpu = Core::System::GetInstance().GPU(); 70 auto& gpu = system.GPU();
69 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; 71 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
70 if (params.flags & 1) { 72 if (params.flags & 1) {
71 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); 73 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1);
@@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
85 std::vector<IoctlRemapEntry> entries(num_entries); 87 std::vector<IoctlRemapEntry> entries(num_entries);
86 std::memcpy(entries.data(), input.data(), input.size()); 88 std::memcpy(entries.data(), input.data(), input.size());
87 89
88 auto& gpu = Core::System::GetInstance().GPU(); 90 auto& gpu = system.GPU();
89 for (const auto& entry : entries) { 91 for (const auto& entry : entries) {
90 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 92 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
91 entry.offset, entry.nvmap_handle, entry.pages); 93 entry.offset, entry.nvmap_handle, entry.pages);
@@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
136 // case to prevent unexpected behavior. 138 // case to prevent unexpected behavior.
137 ASSERT(object->id == params.nvmap_handle); 139 ASSERT(object->id == params.nvmap_handle);
138 140
139 auto& gpu = Core::System::GetInstance().GPU(); 141 auto& gpu = system.GPU();
140 142
141 if (params.flags & 1) { 143 if (params.flags & 1) {
142 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); 144 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size);
@@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
173 return 0; 175 return 0;
174 } 176 }
175 177
176 params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, 178 params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size);
177 itr->second.size);
178 buffer_mappings.erase(itr->second.offset); 179 buffer_mappings.erase(itr->second.offset);
179 180
180 std::memcpy(output.data(), &params, output.size()); 181 std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index eb14b1da8..30ca5f4c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvhost_as_gpu final : public nvdevice { 18class nvhost_as_gpu final : public nvdevice {
19public: 19public:
20 explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvhost_as_gpu() override; 21 ~nvhost_as_gpu() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25private: 26private:
26 enum class IoctlCommand : u32_le { 27 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b39fb9ef9..9a66a5f88 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -7,14 +7,20 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h"
11#include "core/hle/kernel/readable_event.h"
12#include "core/hle/kernel/writable_event.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" 13#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
14#include "video_core/gpu.h"
11 15
12namespace Service::Nvidia::Devices { 16namespace Service::Nvidia::Devices {
13 17
14nvhost_ctrl::nvhost_ctrl() = default; 18nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
19 : nvdevice(system), events_interface{events_interface} {}
15nvhost_ctrl::~nvhost_ctrl() = default; 20nvhost_ctrl::~nvhost_ctrl() = default;
16 21
17u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 22u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 IoctlCtrl& ctrl) {
18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 24 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
19 command.raw, input.size(), output.size()); 25 command.raw, input.size(), output.size());
20 26
@@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<
22 case IoctlCommand::IocGetConfigCommand: 28 case IoctlCommand::IocGetConfigCommand:
23 return NvOsGetConfigU32(input, output); 29 return NvOsGetConfigU32(input, output);
24 case IoctlCommand::IocCtrlEventWaitCommand: 30 case IoctlCommand::IocCtrlEventWaitCommand:
25 return IocCtrlEventWait(input, output, false); 31 return IocCtrlEventWait(input, output, false, ctrl);
26 case IoctlCommand::IocCtrlEventWaitAsyncCommand: 32 case IoctlCommand::IocCtrlEventWaitAsyncCommand:
27 return IocCtrlEventWait(input, output, true); 33 return IocCtrlEventWait(input, output, true, ctrl);
28 case IoctlCommand::IocCtrlEventRegisterCommand: 34 case IoctlCommand::IocCtrlEventRegisterCommand:
29 return IocCtrlEventRegister(input, output); 35 return IocCtrlEventRegister(input, output);
36 case IoctlCommand::IocCtrlEventUnregisterCommand:
37 return IocCtrlEventUnregister(input, output);
38 case IoctlCommand::IocCtrlEventSignalCommand:
39 return IocCtrlEventSignal(input, output);
30 } 40 }
31 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 41 UNIMPLEMENTED_MSG("Unimplemented ioctl");
32 return 0; 42 return 0;
@@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>&
41} 51}
42 52
43u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, 53u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
44 bool is_async) { 54 bool is_async, IoctlCtrl& ctrl) {
45 IocCtrlEventWaitParams params{}; 55 IocCtrlEventWaitParams params{};
46 std::memcpy(&params, input.data(), sizeof(params)); 56 std::memcpy(&params, input.data(), sizeof(params));
47 LOG_WARNING(Service_NVDRV, 57 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
48 "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", 58 params.syncpt_id, params.threshold, params.timeout, is_async);
49 params.syncpt_id, params.threshold, params.timeout, is_async);
50 59
51 // TODO(Subv): Implement actual syncpt waiting. 60 if (params.syncpt_id >= MaxSyncPoints) {
52 params.value = 0; 61 return NvResult::BadParameter;
62 }
63
64 auto& gpu = system.GPU();
65 // This is mostly to take into account unimplemented features. As synced
66 // gpu is always synced.
67 if (!gpu.IsAsync()) {
68 return NvResult::Success;
69 }
70 auto lock = gpu.LockSync();
71 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
72 const s32 diff = current_syncpoint_value - params.threshold;
73 if (diff >= 0) {
74 params.value = current_syncpoint_value;
75 std::memcpy(output.data(), &params, sizeof(params));
76 return NvResult::Success;
77 }
78 const u32 target_value = current_syncpoint_value - diff;
79
80 if (!is_async) {
81 params.value = 0;
82 }
83
84 if (params.timeout == 0) {
85 std::memcpy(output.data(), &params, sizeof(params));
86 return NvResult::Timeout;
87 }
88
89 u32 event_id;
90 if (is_async) {
91 event_id = params.value & 0x00FF;
92 if (event_id >= MaxNvEvents) {
93 std::memcpy(output.data(), &params, sizeof(params));
94 return NvResult::BadParameter;
95 }
96 } else {
97 if (ctrl.fresh_call) {
98 const auto result = events_interface.GetFreeEvent();
99 if (result) {
100 event_id = *result;
101 } else {
102 LOG_CRITICAL(Service_NVDRV, "No Free Events available!");
103 event_id = params.value & 0x00FF;
104 }
105 } else {
106 event_id = ctrl.event_id;
107 }
108 }
109
110 EventState status = events_interface.status[event_id];
111 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {
112 events_interface.SetEventStatus(event_id, EventState::Waiting);
113 events_interface.assigned_syncpt[event_id] = params.syncpt_id;
114 events_interface.assigned_value[event_id] = target_value;
115 if (is_async) {
116 params.value = params.syncpt_id << 4;
117 } else {
118 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
119 }
120 params.value |= event_id;
121 events_interface.events[event_id].writable->Clear();
122 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
123 if (!is_async && ctrl.fresh_call) {
124 ctrl.must_delay = true;
125 ctrl.timeout = params.timeout;
126 ctrl.event_id = event_id;
127 return NvResult::Timeout;
128 }
129 std::memcpy(output.data(), &params, sizeof(params));
130 return NvResult::Timeout;
131 }
53 std::memcpy(output.data(), &params, sizeof(params)); 132 std::memcpy(output.data(), &params, sizeof(params));
54 return 0; 133 return NvResult::BadParameter;
55} 134}
56 135
57u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { 136u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
58 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 137 IocCtrlEventRegisterParams params{};
59 // TODO(bunnei): Implement this. 138 std::memcpy(&params, input.data(), sizeof(params));
60 return 0; 139 const u32 event_id = params.user_event_id & 0x00FF;
140 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
141 if (event_id >= MaxNvEvents) {
142 return NvResult::BadParameter;
143 }
144 if (events_interface.registered[event_id]) {
145 return NvResult::BadParameter;
146 }
147 events_interface.RegisterEvent(event_id);
148 return NvResult::Success;
149}
150
151u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) {
152 IocCtrlEventUnregisterParams params{};
153 std::memcpy(&params, input.data(), sizeof(params));
154 const u32 event_id = params.user_event_id & 0x00FF;
155 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
156 if (event_id >= MaxNvEvents) {
157 return NvResult::BadParameter;
158 }
159 if (!events_interface.registered[event_id]) {
160 return NvResult::BadParameter;
161 }
162 events_interface.UnregisterEvent(event_id);
163 return NvResult::Success;
164}
165
166u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
167 IocCtrlEventSignalParams params{};
168 std::memcpy(&params, input.data(), sizeof(params));
169 // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
170 // It is believed from RE to cancel the GPU Event. However, better research is required
171 u32 event_id = params.user_event_id & 0x00FF;
172 LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
173 if (event_id >= MaxNvEvents) {
174 return NvResult::BadParameter;
175 }
176 if (events_interface.status[event_id] == EventState::Waiting) {
177 auto& gpu = system.GPU();
178 if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
179 events_interface.assigned_value[event_id])) {
180 events_interface.LiberateEvent(event_id);
181 events_interface.events[event_id].writable->Signal();
182 }
183 }
184 return NvResult::Success;
61} 185}
62 186
63} // namespace Service::Nvidia::Devices 187} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 6d0de2212..14e6e7e57 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -8,15 +8,17 @@
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/nvdrv/devices/nvdevice.h" 10#include "core/hle/service/nvdrv/devices/nvdevice.h"
11#include "core/hle/service/nvdrv/nvdrv.h"
11 12
12namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
13 14
14class nvhost_ctrl final : public nvdevice { 15class nvhost_ctrl final : public nvdevice {
15public: 16public:
16 nvhost_ctrl(); 17 explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
17 ~nvhost_ctrl() override; 18 ~nvhost_ctrl() override;
18 19
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 20 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) override;
20 22
21private: 23private:
22 enum class IoctlCommand : u32_le { 24 enum class IoctlCommand : u32_le {
@@ -132,9 +134,16 @@ private:
132 134
133 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); 135 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
134 136
135 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); 137 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async,
138 IoctlCtrl& ctrl);
136 139
137 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); 140 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
141
142 u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
143
144 u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
145
146 EventInterface& events_interface;
138}; 147};
139 148
140} // namespace Service::Nvidia::Devices 149} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 0e28755bd..988effd90 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -12,10 +12,11 @@
12 12
13namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
14 14
15nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; 15nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; 16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
17 17
18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
19 IoctlCtrl& ctrl) {
19 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
20 command.raw, input.size(), output.size()); 21 command.raw, input.size(), output.size());
21 22
@@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
185 186
186 IoctlGetGpuTime params{}; 187 IoctlGetGpuTime params{};
187 std::memcpy(&params, input.data(), input.size()); 188 std::memcpy(&params, input.data(), input.size());
188 const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); 189 const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks());
189 params.gpu_time = static_cast<u64_le>(ns.count()); 190 params.gpu_time = static_cast<u64_le>(ns.count());
190 std::memcpy(output.data(), &params, output.size()); 191 std::memcpy(output.data(), &params, output.size());
191 return 0; 192 return 0;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 240435eea..2b035ae3f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_ctrl_gpu final : public nvdevice { 14class nvhost_ctrl_gpu final : public nvdevice {
15public: 15public:
16 nvhost_ctrl_gpu(); 16 explicit nvhost_ctrl_gpu(Core::System& system);
17 ~nvhost_ctrl_gpu() override; 17 ~nvhost_ctrl_gpu() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 8ce7bc7a5..241dac881 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvhost_gpu::~nvhost_gpu() = default; 18nvhost_gpu::~nvhost_gpu() = default;
18 19
19u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 22 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
21 command.raw, input.size(), output.size()); 23 command.raw, input.size(), output.size());
22 24
@@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
119 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, 121 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
120 params.unk3); 122 params.unk3);
121 123
122 params.fence_out.id = 0; 124 auto& gpu = system.GPU();
123 params.fence_out.value = 0; 125 params.fence_out.id = assigned_syncpoints;
126 params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
127 assigned_syncpoints++;
124 std::memcpy(output.data(), &params, output.size()); 128 std::memcpy(output.data(), &params, output.size());
125 return 0; 129 return 0;
126} 130}
@@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
143 IoctlSubmitGpfifo params{}; 147 IoctlSubmitGpfifo params{};
144 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 148 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
145 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 149 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
146 params.address, params.num_entries, params.flags); 150 params.address, params.num_entries, params.flags.raw);
147 151
148 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + 152 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
149 params.num_entries * sizeof(Tegra::CommandListHeader), 153 params.num_entries * sizeof(Tegra::CommandListHeader),
@@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 157 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
154 params.num_entries * sizeof(Tegra::CommandListHeader)); 158 params.num_entries * sizeof(Tegra::CommandListHeader));
155 159
156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 160 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
161 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
162
163 auto& gpu = system.GPU();
164 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
165 if (params.flags.increment.Value()) {
166 params.fence_out.value += current_syncpoint_value;
167 } else {
168 params.fence_out.value = current_syncpoint_value;
169 }
170 gpu.PushGPUEntries(std::move(entries));
157 171
158 params.fence_out.id = 0;
159 params.fence_out.value = 0;
160 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo)); 172 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
161 return 0; 173 return 0;
162} 174}
@@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
168 IoctlSubmitGpfifo params{}; 180 IoctlSubmitGpfifo params{};
169 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 181 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
170 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 182 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
171 params.address, params.num_entries, params.flags); 183 params.address, params.num_entries, params.flags.raw);
172 184
173 Tegra::CommandList entries(params.num_entries); 185 Tegra::CommandList entries(params.num_entries);
174 Memory::ReadBlock(params.address, entries.data(), 186 Memory::ReadBlock(params.address, entries.data(),
175 params.num_entries * sizeof(Tegra::CommandListHeader)); 187 params.num_entries * sizeof(Tegra::CommandListHeader));
176 188
177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 189 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
190 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
191
192 auto& gpu = system.GPU();
193 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
194 if (params.flags.increment.Value()) {
195 params.fence_out.value += current_syncpoint_value;
196 } else {
197 params.fence_out.value = current_syncpoint_value;
198 }
199 gpu.PushGPUEntries(std::move(entries));
178 200
179 params.fence_out.id = 0;
180 params.fence_out.value = 0;
181 std::memcpy(output.data(), &params, output.size()); 201 std::memcpy(output.data(), &params, output.size());
182 return 0; 202 return 0;
183} 203}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 62beb5c0c..d2e8fbae9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,6 +10,7 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/swap.h" 11#include "common/swap.h"
12#include "core/hle/service/nvdrv/devices/nvdevice.h" 12#include "core/hle/service/nvdrv/devices/nvdevice.h"
13#include "core/hle/service/nvdrv/nvdata.h"
13 14
14namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
15 16
@@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
20 21
21class nvhost_gpu final : public nvdevice { 22class nvhost_gpu final : public nvdevice {
22public: 23public:
23 explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); 24 explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
24 ~nvhost_gpu() override; 25 ~nvhost_gpu() override;
25 26
26 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 27 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
28 IoctlCtrl& ctrl) override;
27 29
28private: 30private:
29 enum class IoctlCommand : u32_le { 31 enum class IoctlCommand : u32_le {
@@ -113,11 +115,7 @@ private:
113 static_assert(sizeof(IoctlGetErrorNotification) == 16, 115 static_assert(sizeof(IoctlGetErrorNotification) == 16,
114 "IoctlGetErrorNotification is incorrect size"); 116 "IoctlGetErrorNotification is incorrect size");
115 117
116 struct IoctlFence { 118 static_assert(sizeof(Fence) == 8, "Fence is incorrect size");
117 u32_le id;
118 u32_le value;
119 };
120 static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");
121 119
122 struct IoctlAllocGpfifoEx { 120 struct IoctlAllocGpfifoEx {
123 u32_le num_entries; 121 u32_le num_entries;
@@ -132,13 +130,13 @@ private:
132 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); 130 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size");
133 131
134 struct IoctlAllocGpfifoEx2 { 132 struct IoctlAllocGpfifoEx2 {
135 u32_le num_entries; // in 133 u32_le num_entries; // in
136 u32_le flags; // in 134 u32_le flags; // in
137 u32_le unk0; // in (1 works) 135 u32_le unk0; // in (1 works)
138 IoctlFence fence_out; // out 136 Fence fence_out; // out
139 u32_le unk1; // in 137 u32_le unk1; // in
140 u32_le unk2; // in 138 u32_le unk2; // in
141 u32_le unk3; // in 139 u32_le unk3; // in
142 }; 140 };
143 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); 141 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");
144 142
@@ -153,10 +151,16 @@ private:
153 struct IoctlSubmitGpfifo { 151 struct IoctlSubmitGpfifo {
154 u64_le address; // pointer to gpfifo entry structs 152 u64_le address; // pointer to gpfifo entry structs
155 u32_le num_entries; // number of fence objects being submitted 153 u32_le num_entries; // number of fence objects being submitted
156 u32_le flags; 154 union {
157 IoctlFence fence_out; // returned new fence object for others to wait on 155 u32_le raw;
158 }; 156 BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
159 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), 157 BitField<1, 1, u32_le> add_increment; // append an increment to the list
158 BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
159 BitField<8, 1, u32_le> increment; // increment the returned fence
160 } flags;
161 Fence fence_out; // returned new fence object for others to wait on
162 };
163 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
160 "IoctlSubmitGpfifo is incorrect size"); 164 "IoctlSubmitGpfifo is incorrect size");
161 165
162 struct IoctlGetWaitbase { 166 struct IoctlGetWaitbase {
@@ -184,6 +188,7 @@ private:
184 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); 188 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
185 189
186 std::shared_ptr<nvmap> nvmap_dev; 190 std::shared_ptr<nvmap> nvmap_dev;
191 u32 assigned_syncpoints{};
187}; 192};
188 193
189} // namespace Service::Nvidia::Devices 194} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index f5e8ea7c3..f572ad30f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvdec::nvhost_nvdec() = default; 13nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {}
14nvhost_nvdec::~nvhost_nvdec() = default; 14nvhost_nvdec::~nvhost_nvdec() = default;
15 15
16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0e7b284f8..2710f0511 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvdec final : public nvdevice { 14class nvhost_nvdec final : public nvdevice {
15public: 15public:
16 nvhost_nvdec(); 16 explicit nvhost_nvdec(Core::System& system);
17 ~nvhost_nvdec() override; 17 ~nvhost_nvdec() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 3e0951ab0..38282956f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvjpg::nvhost_nvjpg() = default; 13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
14nvhost_nvjpg::~nvhost_nvjpg() = default; 14nvhost_nvjpg::~nvhost_nvjpg() = default;
15 15
16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 89fd5e95e..379766693 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvjpg final : public nvdevice { 14class nvhost_nvjpg final : public nvdevice {
15public: 15public:
16 nvhost_nvjpg(); 16 explicit nvhost_nvjpg(Core::System& system);
17 ~nvhost_nvjpg() override; 17 ~nvhost_nvjpg() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index d544f0f31..70e8091db 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_vic::nvhost_vic() = default; 13nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {}
14nvhost_vic::~nvhost_vic() = default; 14nvhost_vic::~nvhost_vic() = default;
15 15
16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index fc24c3f9c..7d111977e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_vic final : public nvdevice { 14class nvhost_vic final : public nvdevice {
15public: 15public:
16 nvhost_vic(); 16 explicit nvhost_vic(Core::System& system);
17 ~nvhost_vic() override; 17 ~nvhost_vic() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 1ec796fc6..223b496b7 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -18,7 +18,7 @@ enum {
18}; 18};
19} 19}
20 20
21nvmap::nvmap() = default; 21nvmap::nvmap(Core::System& system) : nvdevice(system) {}
22nvmap::~nvmap() = default; 22nvmap::~nvmap() = default;
23 23
24VAddr nvmap::GetObjectAddress(u32 handle) const { 24VAddr nvmap::GetObjectAddress(u32 handle) const {
@@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
28 return object->addr; 28 return object->addr;
29} 29}
30 30
31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
32 IoctlCtrl& ctrl) {
32 switch (static_cast<IoctlCommand>(command.raw)) { 33 switch (static_cast<IoctlCommand>(command.raw)) {
33 case IoctlCommand::Create: 34 case IoctlCommand::Create:
34 return IocCreate(input, output); 35 return IocCreate(input, output);
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 396230c19..bf4a101c2 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices {
16 16
17class nvmap final : public nvdevice { 17class nvmap final : public nvdevice {
18public: 18public:
19 nvmap(); 19 explicit nvmap(Core::System& system);
20 ~nvmap() override; 20 ~nvmap() override;
21 21
22 /// Returns the allocated address of an nvmap object given its handle. 22 /// Returns the allocated address of an nvmap object given its handle.
23 VAddr GetObjectAddress(u32 handle) const; 23 VAddr GetObjectAddress(u32 handle) const;
24 24
25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
26 IoctlCtrl& ctrl) override;
26 27
27 /// Represents an nvmap object. 28 /// Represents an nvmap object.
28 struct Object { 29 struct Object {
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index b60fc748b..d5be64ed2 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -8,12 +8,18 @@
8#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/kernel.h" 9#include "core/hle/kernel/kernel.h"
10#include "core/hle/kernel/readable_event.h" 10#include "core/hle/kernel/readable_event.h"
11#include "core/hle/kernel/thread.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/interface.h" 13#include "core/hle/service/nvdrv/interface.h"
14#include "core/hle/service/nvdrv/nvdata.h"
13#include "core/hle/service/nvdrv/nvdrv.h" 15#include "core/hle/service/nvdrv/nvdrv.h"
14 16
15namespace Service::Nvidia { 17namespace Service::Nvidia {
16 18
19void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
20 nvdrv->SignalSyncpt(syncpoint_id, value);
21}
22
17void NVDRV::Open(Kernel::HLERequestContext& ctx) { 23void NVDRV::Open(Kernel::HLERequestContext& ctx) {
18 LOG_DEBUG(Service_NVDRV, "called"); 24 LOG_DEBUG(Service_NVDRV, "called");
19 25
@@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) {
36 42
37 std::vector<u8> output(ctx.GetWriteBufferSize()); 43 std::vector<u8> output(ctx.GetWriteBufferSize());
38 44
45 IoctlCtrl ctrl{};
46
47 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl);
48
49 if (ctrl.must_delay) {
50 ctrl.fresh_call = false;
51 ctx.SleepClientThread(
52 "NVServices::DelayedResponse", ctrl.timeout,
53 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
54 Kernel::ThreadWakeupReason reason) {
55 IoctlCtrl ctrl2{ctrl};
56 std::vector<u8> output2 = output;
57 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2);
58 ctx.WriteBuffer(output2);
59 IPC::ResponseBuilder rb{ctx, 3};
60 rb.Push(RESULT_SUCCESS);
61 rb.Push(result);
62 },
63 nvdrv->GetEventWriteable(ctrl.event_id));
64 } else {
65 ctx.WriteBuffer(output);
66 }
39 IPC::ResponseBuilder rb{ctx, 3}; 67 IPC::ResponseBuilder rb{ctx, 3};
40 rb.Push(RESULT_SUCCESS); 68 rb.Push(RESULT_SUCCESS);
41 rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); 69 rb.Push(result);
42
43 ctx.WriteBuffer(output);
44} 70}
45 71
46void NVDRV::Close(Kernel::HLERequestContext& ctx) { 72void NVDRV::Close(Kernel::HLERequestContext& ctx) {
@@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
66void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { 92void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
67 IPC::RequestParser rp{ctx}; 93 IPC::RequestParser rp{ctx};
68 u32 fd = rp.Pop<u32>(); 94 u32 fd = rp.Pop<u32>();
69 u32 event_id = rp.Pop<u32>(); 95 // TODO(Blinkhawk): Figure the meaning of the flag at bit 16
96 u32 event_id = rp.Pop<u32>() & 0x000000FF;
70 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); 97 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
71 98
72 IPC::ResponseBuilder rb{ctx, 3, 1}; 99 IPC::ResponseBuilder rb{ctx, 3, 1};
73 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
74 rb.PushCopyObjects(query_event.readable); 101 if (event_id < MaxNvEvents) {
75 rb.Push<u32>(0); 102 rb.PushCopyObjects(nvdrv->GetEvent(event_id));
103 rb.Push<u32>(NvResult::Success);
104 } else {
105 rb.Push<u32>(0);
106 rb.Push<u32>(NvResult::BadParameter);
107 }
76} 108}
77 109
78void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { 110void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) {
@@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
127 {13, &NVDRV::FinishInitialize, "FinishInitialize"}, 159 {13, &NVDRV::FinishInitialize, "FinishInitialize"},
128 }; 160 };
129 RegisterHandlers(functions); 161 RegisterHandlers(functions);
130
131 auto& kernel = Core::System::GetInstance().Kernel();
132 query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
133 "NVDRV::query_event");
134} 162}
135 163
136NVDRV::~NVDRV() = default; 164NVDRV::~NVDRV() = default;
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index 5b4889910..10a0ecd52 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -19,6 +19,8 @@ public:
19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name); 19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
20 ~NVDRV() override; 20 ~NVDRV() override;
21 21
22 void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value);
23
22private: 24private:
23 void Open(Kernel::HLERequestContext& ctx); 25 void Open(Kernel::HLERequestContext& ctx);
24 void Ioctl(Kernel::HLERequestContext& ctx); 26 void Ioctl(Kernel::HLERequestContext& ctx);
@@ -33,8 +35,6 @@ private:
33 std::shared_ptr<Module> nvdrv; 35 std::shared_ptr<Module> nvdrv;
34 36
35 u64 pid{}; 37 u64 pid{};
36
37 Kernel::EventPair query_event;
38}; 38};
39 39
40} // namespace Service::Nvidia 40} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
new file mode 100644
index 000000000..ac03cbc23
--- /dev/null
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -0,0 +1,48 @@
1#pragma once
2
3#include <array>
4#include "common/common_types.h"
5
6namespace Service::Nvidia {
7
8constexpr u32 MaxSyncPoints = 192;
9constexpr u32 MaxNvEvents = 64;
10
11struct Fence {
12 s32 id;
13 u32 value;
14};
15
16static_assert(sizeof(Fence) == 8, "Fence has wrong size");
17
18struct MultiFence {
19 u32 num_fences;
20 std::array<Fence, 4> fences;
21};
22
23enum NvResult : u32 {
24 Success = 0,
25 BadParameter = 4,
26 Timeout = 5,
27 ResourceError = 15,
28};
29
30enum class EventState {
31 Free = 0,
32 Registered = 1,
33 Waiting = 2,
34 Busy = 3,
35};
36
37struct IoctlCtrl {
38 // First call done to the servioce for services that call itself again after a call.
39 bool fresh_call{true};
40 // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
41 bool must_delay{};
42 // Timeout for the delay
43 s64 timeout{};
44 // NV Event Id
45 s32 event_id{-1};
46};
47
48} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 6e4b8f2c6..2011a226a 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -4,7 +4,10 @@
4 4
5#include <utility> 5#include <utility>
6 6
7#include <fmt/format.h>
7#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/readable_event.h"
10#include "core/hle/kernel/writable_event.h"
8#include "core/hle/service/nvdrv/devices/nvdevice.h" 11#include "core/hle/service/nvdrv/devices/nvdevice.h"
9#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 12#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
10#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 13#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
@@ -22,8 +25,9 @@
22 25
23namespace Service::Nvidia { 26namespace Service::Nvidia {
24 27
25void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { 28void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
26 auto module_ = std::make_shared<Module>(); 29 Core::System& system) {
30 auto module_ = std::make_shared<Module>(system);
27 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); 31 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager);
28 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); 32 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager);
29 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); 33 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager);
@@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
32 nvflinger.SetNVDrvInstance(module_); 36 nvflinger.SetNVDrvInstance(module_);
33} 37}
34 38
35Module::Module() { 39Module::Module(Core::System& system) {
36 auto nvmap_dev = std::make_shared<Devices::nvmap>(); 40 auto& kernel = system.Kernel();
37 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); 41 for (u32 i = 0; i < MaxNvEvents; i++) {
38 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); 42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
39 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); 43 events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(
44 kernel, Kernel::ResetType::Automatic, event_label);
45 events_interface.status[i] = EventState::Free;
46 events_interface.registered[i] = false;
47 }
48 auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
49 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
50 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
51 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
40 devices["/dev/nvmap"] = nvmap_dev; 52 devices["/dev/nvmap"] = nvmap_dev;
41 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); 53 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
42 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); 54 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
43 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); 55 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system);
44 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); 56 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
45 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); 57 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system);
46} 58}
47 59
48Module::~Module() = default; 60Module::~Module() = default;
@@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) {
59 return fd; 71 return fd;
60} 72}
61 73
62u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { 74u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
75 IoctlCtrl& ctrl) {
63 auto itr = open_files.find(fd); 76 auto itr = open_files.find(fd);
64 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); 77 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");
65 78
66 auto& device = itr->second; 79 auto& device = itr->second;
67 return device->ioctl({command}, input, output); 80 return device->ioctl({command}, input, output, ctrl);
68} 81}
69 82
70ResultCode Module::Close(u32 fd) { 83ResultCode Module::Close(u32 fd) {
@@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) {
77 return RESULT_SUCCESS; 90 return RESULT_SUCCESS;
78} 91}
79 92
93void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
94 for (u32 i = 0; i < MaxNvEvents; i++) {
95 if (events_interface.assigned_syncpt[i] == syncpoint_id &&
96 events_interface.assigned_value[i] == value) {
97 events_interface.LiberateEvent(i);
98 events_interface.events[i].writable->Signal();
99 }
100 }
101}
102
103Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
104 return events_interface.events[event_id].readable;
105}
106
107Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
108 return events_interface.events[event_id].writable;
109}
110
80} // namespace Service::Nvidia 111} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 53564f696..a339ab672 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -8,8 +8,14 @@
8#include <unordered_map> 8#include <unordered_map>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/nvdata.h"
11#include "core/hle/service/service.h" 13#include "core/hle/service/service.h"
12 14
15namespace Core {
16class System;
17}
18
13namespace Service::NVFlinger { 19namespace Service::NVFlinger {
14class NVFlinger; 20class NVFlinger;
15} 21}
@@ -20,16 +26,72 @@ namespace Devices {
20class nvdevice; 26class nvdevice;
21} 27}
22 28
23struct IoctlFence { 29struct EventInterface {
24 u32 id; 30 // Mask representing currently busy events
25 u32 value; 31 u64 events_mask{};
32 // Each kernel event associated to an NV event
33 std::array<Kernel::EventPair, MaxNvEvents> events;
34 // The status of the current NVEvent
35 std::array<EventState, MaxNvEvents> status{};
36 // Tells if an NVEvent is registered or not
37 std::array<bool, MaxNvEvents> registered{};
38 // When an NVEvent is waiting on GPU interrupt, this is the sync_point
39 // associated with it.
40 std::array<u32, MaxNvEvents> assigned_syncpt{};
41 // This is the value of the GPU interrupt for which the NVEvent is waiting
42 // for.
43 std::array<u32, MaxNvEvents> assigned_value{};
44 // Constant to denote an unasigned syncpoint.
45 static constexpr u32 unassigned_syncpt = 0xFFFFFFFF;
46 std::optional<u32> GetFreeEvent() const {
47 u64 mask = events_mask;
48 for (u32 i = 0; i < MaxNvEvents; i++) {
49 const bool is_free = (mask & 0x1) == 0;
50 if (is_free) {
51 if (status[i] == EventState::Registered || status[i] == EventState::Free) {
52 return {i};
53 }
54 }
55 mask = mask >> 1;
56 }
57 return {};
58 }
59 void SetEventStatus(const u32 event_id, EventState new_status) {
60 EventState old_status = status[event_id];
61 if (old_status == new_status) {
62 return;
63 }
64 status[event_id] = new_status;
65 if (new_status == EventState::Registered) {
66 registered[event_id] = true;
67 }
68 if (new_status == EventState::Waiting || new_status == EventState::Busy) {
69 events_mask |= (1ULL << event_id);
70 }
71 }
72 void RegisterEvent(const u32 event_id) {
73 registered[event_id] = true;
74 if (status[event_id] == EventState::Free) {
75 status[event_id] = EventState::Registered;
76 }
77 }
78 void UnregisterEvent(const u32 event_id) {
79 registered[event_id] = false;
80 if (status[event_id] == EventState::Registered) {
81 status[event_id] = EventState::Free;
82 }
83 }
84 void LiberateEvent(const u32 event_id) {
85 status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free;
86 events_mask &= ~(1ULL << event_id);
87 assigned_syncpt[event_id] = unassigned_syncpt;
88 assigned_value[event_id] = 0;
89 }
26}; 90};
27 91
28static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size");
29
30class Module final { 92class Module final {
31public: 93public:
32 Module(); 94 Module(Core::System& system);
33 ~Module(); 95 ~Module();
34 96
35 /// Returns a pointer to one of the available devices, identified by its name. 97 /// Returns a pointer to one of the available devices, identified by its name.
@@ -44,10 +106,17 @@ public:
44 /// Opens a device node and returns a file descriptor to it. 106 /// Opens a device node and returns a file descriptor to it.
45 u32 Open(const std::string& device_name); 107 u32 Open(const std::string& device_name);
46 /// Sends an ioctl command to the specified file descriptor. 108 /// Sends an ioctl command to the specified file descriptor.
47 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); 109 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
110 IoctlCtrl& ctrl);
48 /// Closes a device file descriptor and returns operation success. 111 /// Closes a device file descriptor and returns operation success.
49 ResultCode Close(u32 fd); 112 ResultCode Close(u32 fd);
50 113
114 void SignalSyncpt(const u32 syncpoint_id, const u32 value);
115
116 Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const;
117
118 Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
119
51private: 120private:
52 /// Id to use for the next open file descriptor. 121 /// Id to use for the next open file descriptor.
53 u32 next_fd = 1; 122 u32 next_fd = 1;
@@ -57,9 +126,12 @@ private:
57 126
58 /// Mapping of device node names to their implementation. 127 /// Mapping of device node names to their implementation.
59 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; 128 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
129
130 EventInterface events_interface;
60}; 131};
61 132
62/// Registers all NVDRV services with the specified service manager. 133/// Registers all NVDRV services with the specified service manager.
63void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); 134void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
135 Core::System& system);
64 136
65} // namespace Service::Nvidia 137} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 5731e815f..e1a07d3ee 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
34 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
35} 35}
36 36
37std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { 39 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired 40 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method. 41 // and Released by the compositor, see the NVFlinger::Compose method.
@@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
51 } 52 }
52 53
53 itr->status = Buffer::Status::Dequeued; 54 itr->status = Buffer::Status::Dequeued;
54 return itr->slot; 55 return {{itr->slot, &itr->multi_fence}};
55} 56}
56 57
57const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { 58const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
@@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 64}
64 65
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 66void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const Common::Rectangle<int>& crop_rect) { 67 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
68 Service::Nvidia::MultiFence& multi_fence) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 69 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 70 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 71 ASSERT(itr != queue.end());
@@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
71 itr->status = Buffer::Status::Queued; 73 itr->status = Buffer::Status::Queued;
72 itr->transform = transform; 74 itr->transform = transform;
73 itr->crop_rect = crop_rect; 75 itr->crop_rect = crop_rect;
76 itr->swap_interval = swap_interval;
77 itr->multi_fence = multi_fence;
78 queue_sequence.push_back(slot);
74} 79}
75 80
76std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 81std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
77 auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { 82 auto itr = queue.end();
78 return buffer.status == Buffer::Status::Queued; 83 // Iterate to find a queued buffer matching the requested slot.
79 }); 84 while (itr == queue.end() && !queue_sequence.empty()) {
85 u32 slot = queue_sequence.front();
86 itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) {
87 return buffer.status == Buffer::Status::Queued && buffer.slot == slot;
88 });
89 queue_sequence.pop_front();
90 }
80 if (itr == queue.end()) 91 if (itr == queue.end())
81 return {}; 92 return {};
82 itr->status = Buffer::Status::Acquired; 93 itr->status = Buffer::Status::Acquired;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index e1ccb6171..356bedb81 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <list>
7#include <optional> 8#include <optional>
8#include <vector> 9#include <vector>
9 10
@@ -12,6 +13,7 @@
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 15#include "core/hle/kernel/writable_event.h"
16#include "core/hle/service/nvdrv/nvdata.h"
15 17
16namespace Service::NVFlinger { 18namespace Service::NVFlinger {
17 19
@@ -68,13 +70,17 @@ public:
68 IGBPBuffer igbp_buffer; 70 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 71 BufferTransformFlags transform;
70 Common::Rectangle<int> crop_rect; 72 Common::Rectangle<int> crop_rect;
73 u32 swap_interval;
74 Service::Nvidia::MultiFence multi_fence;
71 }; 75 };
72 76
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 78 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width,
79 u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 80 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 81 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const Common::Rectangle<int>& crop_rect); 82 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
83 Service::Nvidia::MultiFence& multi_fence);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 84 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 85 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 86 u32 Query(QueryType type);
@@ -92,6 +98,7 @@ private:
92 u64 layer_id; 98 u64 layer_id;
93 99
94 std::vector<Buffer> queue; 100 std::vector<Buffer> queue;
101 std::list<u32> queue_sequence;
95 Kernel::EventPair buffer_wait_event; 102 Kernel::EventPair buffer_wait_event;
96}; 103};
97 104
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3c5c53e24..f9db79370 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
37 displays.emplace_back(4, "Null"); 37 displays.emplace_back(4, "Null");
38 38
39 // Schedule the screen composition events 39 // Schedule the screen composition events
40 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; 40 composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata,
41 41 s64 cycles_late) {
42 composition_event = core_timing.RegisterEvent( 42 Compose();
43 "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { 43 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
44 Compose(); 44 this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event);
45 this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); 45 });
46 }); 46
47 47 core_timing.ScheduleEvent(frame_ticks, composition_event);
48 core_timing.ScheduleEvent(ticks, composition_event);
49} 48}
50 49
51NVFlinger::~NVFlinger() { 50NVFlinger::~NVFlinger() {
@@ -206,8 +205,14 @@ void NVFlinger::Compose() {
206 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 205 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
207 buffer->get().transform, buffer->get().crop_rect); 206 buffer->get().transform, buffer->get().crop_rect);
208 207
208 swap_interval = buffer->get().swap_interval;
209 buffer_queue.ReleaseBuffer(buffer->get().slot); 209 buffer_queue.ReleaseBuffer(buffer->get().slot);
210 } 210 }
211} 211}
212 212
213s64 NVFlinger::GetNextTicks() const {
214 constexpr s64 max_hertz = 120LL;
215 return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
216}
217
213} // namespace Service::NVFlinger 218} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index c0a83fffb..988be8726 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -74,6 +74,8 @@ public:
74 /// finished. 74 /// finished.
75 void Compose(); 75 void Compose();
76 76
77 s64 GetNextTicks() const;
78
77private: 79private:
78 /// Finds the display identified by the specified ID. 80 /// Finds the display identified by the specified ID.
79 VI::Display* FindDisplay(u64 display_id); 81 VI::Display* FindDisplay(u64 display_id);
@@ -98,6 +100,8 @@ private:
98 /// layers. 100 /// layers.
99 u32 next_buffer_queue_id = 1; 101 u32 next_buffer_queue_id = 1;
100 102
103 u32 swap_interval = 1;
104
101 /// Event that handles screen composition. 105 /// Event that handles screen composition.
102 Core::Timing::EventType* composition_event; 106 Core::Timing::EventType* composition_event;
103 107
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index ebcc41a43..fe6b5f798 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -3,11 +3,44 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/ipc_helpers.h" 5#include "core/hle/ipc_helpers.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/process.h"
6#include "core/hle/service/pm/pm.h" 8#include "core/hle/service/pm/pm.h"
7#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
8 10
9namespace Service::PM { 11namespace Service::PM {
10 12
13namespace {
14
15constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1};
16
17constexpr u64 NO_PROCESS_FOUND_PID{0};
18
19std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList(
20 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list,
21 std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) {
22 const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate);
23
24 if (iter == process_list.end()) {
25 return std::nullopt;
26 }
27
28 return *iter;
29}
30
31void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx,
32 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) {
33 const auto process = SearchProcessList(process_list, [](const auto& process) {
34 return process->GetProcessID() == Kernel::Process::ProcessIDMin;
35 });
36
37 IPC::ResponseBuilder rb{ctx, 4};
38 rb.Push(RESULT_SUCCESS);
39 rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID);
40}
41
42} // Anonymous namespace
43
11class BootMode final : public ServiceFramework<BootMode> { 44class BootMode final : public ServiceFramework<BootMode> {
12public: 45public:
13 explicit BootMode() : ServiceFramework{"pm:bm"} { 46 explicit BootMode() : ServiceFramework{"pm:bm"} {
@@ -41,14 +74,15 @@ private:
41 74
42class DebugMonitor final : public ServiceFramework<DebugMonitor> { 75class DebugMonitor final : public ServiceFramework<DebugMonitor> {
43public: 76public:
44 explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { 77 explicit DebugMonitor(const Kernel::KernelCore& kernel)
78 : ServiceFramework{"pm:dmnt"}, kernel(kernel) {
45 // clang-format off 79 // clang-format off
46 static const FunctionInfo functions[] = { 80 static const FunctionInfo functions[] = {
47 {0, nullptr, "GetDebugProcesses"}, 81 {0, nullptr, "GetDebugProcesses"},
48 {1, nullptr, "StartDebugProcess"}, 82 {1, nullptr, "StartDebugProcess"},
49 {2, nullptr, "GetTitlePid"}, 83 {2, &DebugMonitor::GetTitlePid, "GetTitlePid"},
50 {3, nullptr, "EnableDebugForTitleId"}, 84 {3, nullptr, "EnableDebugForTitleId"},
51 {4, nullptr, "GetApplicationPid"}, 85 {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"},
52 {5, nullptr, "EnableDebugForApplication"}, 86 {5, nullptr, "EnableDebugForApplication"},
53 {6, nullptr, "DisableDebug"}, 87 {6, nullptr, "DisableDebug"},
54 }; 88 };
@@ -56,21 +90,77 @@ public:
56 90
57 RegisterHandlers(functions); 91 RegisterHandlers(functions);
58 } 92 }
93
94private:
95 void GetTitlePid(Kernel::HLERequestContext& ctx) {
96 IPC::RequestParser rp{ctx};
97 const auto title_id = rp.PopRaw<u64>();
98
99 LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id);
100
101 const auto process =
102 SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) {
103 return process->GetTitleID() == title_id;
104 });
105
106 if (!process.has_value()) {
107 IPC::ResponseBuilder rb{ctx, 2};
108 rb.Push(ERROR_PROCESS_NOT_FOUND);
109 return;
110 }
111
112 IPC::ResponseBuilder rb{ctx, 4};
113 rb.Push(RESULT_SUCCESS);
114 rb.Push((*process)->GetProcessID());
115 }
116
117 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
118 LOG_DEBUG(Service_PM, "called");
119 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
120 }
121
122 const Kernel::KernelCore& kernel;
59}; 123};
60 124
61class Info final : public ServiceFramework<Info> { 125class Info final : public ServiceFramework<Info> {
62public: 126public:
63 explicit Info() : ServiceFramework{"pm:info"} { 127 explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list)
128 : ServiceFramework{"pm:info"}, process_list(process_list) {
64 static const FunctionInfo functions[] = { 129 static const FunctionInfo functions[] = {
65 {0, nullptr, "GetTitleId"}, 130 {0, &Info::GetTitleId, "GetTitleId"},
66 }; 131 };
67 RegisterHandlers(functions); 132 RegisterHandlers(functions);
68 } 133 }
134
135private:
136 void GetTitleId(Kernel::HLERequestContext& ctx) {
137 IPC::RequestParser rp{ctx};
138 const auto process_id = rp.PopRaw<u64>();
139
140 LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id);
141
142 const auto process = SearchProcessList(process_list, [process_id](const auto& process) {
143 return process->GetProcessID() == process_id;
144 });
145
146 if (!process.has_value()) {
147 IPC::ResponseBuilder rb{ctx, 2};
148 rb.Push(ERROR_PROCESS_NOT_FOUND);
149 return;
150 }
151
152 IPC::ResponseBuilder rb{ctx, 4};
153 rb.Push(RESULT_SUCCESS);
154 rb.Push((*process)->GetTitleID());
155 }
156
157 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list;
69}; 158};
70 159
71class Shell final : public ServiceFramework<Shell> { 160class Shell final : public ServiceFramework<Shell> {
72public: 161public:
73 explicit Shell() : ServiceFramework{"pm:shell"} { 162 explicit Shell(const Kernel::KernelCore& kernel)
163 : ServiceFramework{"pm:shell"}, kernel(kernel) {
74 // clang-format off 164 // clang-format off
75 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
76 {0, nullptr, "LaunchProcess"}, 166 {0, nullptr, "LaunchProcess"},
@@ -79,21 +169,31 @@ public:
79 {3, nullptr, "GetProcessEventWaiter"}, 169 {3, nullptr, "GetProcessEventWaiter"},
80 {4, nullptr, "GetProcessEventType"}, 170 {4, nullptr, "GetProcessEventType"},
81 {5, nullptr, "NotifyBootFinished"}, 171 {5, nullptr, "NotifyBootFinished"},
82 {6, nullptr, "GetApplicationPid"}, 172 {6, &Shell::GetApplicationPid, "GetApplicationPid"},
83 {7, nullptr, "BoostSystemMemoryResourceLimit"}, 173 {7, nullptr, "BoostSystemMemoryResourceLimit"},
84 {8, nullptr, "EnableAdditionalSystemThreads"}, 174 {8, nullptr, "EnableAdditionalSystemThreads"},
175 {9, nullptr, "GetUnimplementedEventHandle"},
85 }; 176 };
86 // clang-format on 177 // clang-format on
87 178
88 RegisterHandlers(functions); 179 RegisterHandlers(functions);
89 } 180 }
181
182private:
183 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
184 LOG_DEBUG(Service_PM, "called");
185 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
186 }
187
188 const Kernel::KernelCore& kernel;
90}; 189};
91 190
92void InstallInterfaces(SM::ServiceManager& sm) { 191void InstallInterfaces(Core::System& system) {
93 std::make_shared<BootMode>()->InstallAsService(sm); 192 std::make_shared<BootMode>()->InstallAsService(system.ServiceManager());
94 std::make_shared<DebugMonitor>()->InstallAsService(sm); 193 std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager());
95 std::make_shared<Info>()->InstallAsService(sm); 194 std::make_shared<Info>(system.Kernel().GetProcessList())
96 std::make_shared<Shell>()->InstallAsService(sm); 195 ->InstallAsService(system.ServiceManager());
196 std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager());
97} 197}
98 198
99} // namespace Service::PM 199} // namespace Service::PM
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h
index cc8d3f215..852e7050c 100644
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -4,8 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Service::SM { 7namespace Core {
8class ServiceManager; 8class System;
9} 9}
10 10
11namespace Service::PM { 11namespace Service::PM {
@@ -16,6 +16,6 @@ enum class SystemBootMode {
16}; 16};
17 17
18/// Registers all PM services with the specified service manager. 18/// Registers all PM services with the specified service manager.
19void InstallInterfaces(SM::ServiceManager& service_manager); 19void InstallInterfaces(Core::System& system);
20 20
21} // namespace Service::PM 21} // namespace Service::PM
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index ec9d755b7..3a0f8c3f6 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -195,8 +195,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
195// Module interface 195// Module interface
196 196
197/// Initialize ServiceManager 197/// Initialize ServiceManager
198void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, 198void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
199 FileSys::VfsFilesystem& vfs) {
200 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it 199 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
201 // here and pass it into the respective InstallInterfaces functions. 200 // here and pass it into the respective InstallInterfaces functions.
202 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); 201 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
@@ -206,8 +205,8 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
206 Account::InstallInterfaces(system); 205 Account::InstallInterfaces(system);
207 AM::InstallInterfaces(*sm, nv_flinger, system); 206 AM::InstallInterfaces(*sm, nv_flinger, system);
208 AOC::InstallInterfaces(*sm); 207 AOC::InstallInterfaces(*sm);
209 APM::InstallInterfaces(*sm); 208 APM::InstallInterfaces(system);
210 Audio::InstallInterfaces(*sm); 209 Audio::InstallInterfaces(*sm, system);
211 BCAT::InstallInterfaces(*sm); 210 BCAT::InstallInterfaces(*sm);
212 BPC::InstallInterfaces(*sm); 211 BPC::InstallInterfaces(*sm);
213 BtDrv::InstallInterfaces(*sm); 212 BtDrv::InstallInterfaces(*sm);
@@ -218,7 +217,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
218 EUPLD::InstallInterfaces(*sm); 217 EUPLD::InstallInterfaces(*sm);
219 Fatal::InstallInterfaces(*sm); 218 Fatal::InstallInterfaces(*sm);
220 FGM::InstallInterfaces(*sm); 219 FGM::InstallInterfaces(*sm);
221 FileSystem::InstallInterfaces(*sm, vfs); 220 FileSystem::InstallInterfaces(system);
222 Friend::InstallInterfaces(*sm); 221 Friend::InstallInterfaces(*sm);
223 Glue::InstallInterfaces(system); 222 Glue::InstallInterfaces(system);
224 GRC::InstallInterfaces(*sm); 223 GRC::InstallInterfaces(*sm);
@@ -237,19 +236,19 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
237 NIM::InstallInterfaces(*sm); 236 NIM::InstallInterfaces(*sm);
238 NPNS::InstallInterfaces(*sm); 237 NPNS::InstallInterfaces(*sm);
239 NS::InstallInterfaces(*sm); 238 NS::InstallInterfaces(*sm);
240 Nvidia::InstallInterfaces(*sm, *nv_flinger); 239 Nvidia::InstallInterfaces(*sm, *nv_flinger, system);
241 PCIe::InstallInterfaces(*sm); 240 PCIe::InstallInterfaces(*sm);
242 PCTL::InstallInterfaces(*sm); 241 PCTL::InstallInterfaces(*sm);
243 PCV::InstallInterfaces(*sm); 242 PCV::InstallInterfaces(*sm);
244 PlayReport::InstallInterfaces(*sm); 243 PlayReport::InstallInterfaces(*sm);
245 PM::InstallInterfaces(*sm); 244 PM::InstallInterfaces(system);
246 PSC::InstallInterfaces(*sm); 245 PSC::InstallInterfaces(*sm);
247 PSM::InstallInterfaces(*sm); 246 PSM::InstallInterfaces(*sm);
248 Set::InstallInterfaces(*sm); 247 Set::InstallInterfaces(*sm);
249 Sockets::InstallInterfaces(*sm); 248 Sockets::InstallInterfaces(*sm);
250 SPL::InstallInterfaces(*sm); 249 SPL::InstallInterfaces(*sm);
251 SSL::InstallInterfaces(*sm); 250 SSL::InstallInterfaces(*sm);
252 Time::InstallInterfaces(*sm); 251 Time::InstallInterfaces(system);
253 USB::InstallInterfaces(*sm); 252 USB::InstallInterfaces(*sm);
254 VI::InstallInterfaces(*sm, nv_flinger); 253 VI::InstallInterfaces(*sm, nv_flinger);
255 WLAN::InstallInterfaces(*sm); 254 WLAN::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index abbfe5524..c6c4bdae5 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -182,8 +182,7 @@ private:
182}; 182};
183 183
184/// Initialize ServiceManager 184/// Initialize ServiceManager
185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, 185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system);
186 FileSys::VfsFilesystem& vfs);
187 186
188/// Shutdown ServiceManager 187/// Shutdown ServiceManager
189void Shutdown(); 188void Shutdown();
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 298d85011..b54214421 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -95,6 +95,14 @@ void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
95 PushResponseLanguageCode(ctx, post4_0_0_max_entries); 95 PushResponseLanguageCode(ctx, post4_0_0_max_entries);
96} 96}
97 97
98void SET::GetQuestFlag(Kernel::HLERequestContext& ctx) {
99 LOG_DEBUG(Service_SET, "called");
100
101 IPC::ResponseBuilder rb{ctx, 3};
102 rb.Push(RESULT_SUCCESS);
103 rb.Push(static_cast<u32>(Settings::values.quest_flag));
104}
105
98void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { 106void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
99 LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index); 107 LOG_DEBUG(Service_SET, "called {}", Settings::values.language_index);
100 108
@@ -114,7 +122,7 @@ SET::SET() : ServiceFramework("set") {
114 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, 122 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"},
115 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, 123 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"},
116 {7, nullptr, "GetKeyCodeMap"}, 124 {7, nullptr, "GetKeyCodeMap"},
117 {8, nullptr, "GetQuestFlag"}, 125 {8, &SET::GetQuestFlag, "GetQuestFlag"},
118 {9, nullptr, "GetKeyCodeMap2"}, 126 {9, nullptr, "GetKeyCodeMap2"},
119 }; 127 };
120 // clang-format on 128 // clang-format on
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h
index 31f9cb296..b154e08aa 100644
--- a/src/core/hle/service/set/set.h
+++ b/src/core/hle/service/set/set.h
@@ -42,6 +42,7 @@ private:
42 void GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx); 42 void GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx);
43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); 43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx);
44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); 44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx);
45 void GetQuestFlag(Kernel::HLERequestContext& ctx);
45}; 46};
46 47
47} // namespace Service::Set 48} // namespace Service::Set
diff --git a/src/core/hle/service/time/interface.cpp b/src/core/hle/service/time/interface.cpp
index 8d122ae33..1030185e0 100644
--- a/src/core/hle/service/time/interface.cpp
+++ b/src/core/hle/service/time/interface.cpp
@@ -6,8 +6,9 @@
6 6
7namespace Service::Time { 7namespace Service::Time {
8 8
9Time::Time(std::shared_ptr<Module> time, const char* name) 9Time::Time(std::shared_ptr<Module> time, std::shared_ptr<SharedMemory> shared_memory,
10 : Module::Interface(std::move(time), name) { 10 const char* name)
11 : Module::Interface(std::move(time), std::move(shared_memory), name) {
11 // clang-format off 12 // clang-format off
12 static const FunctionInfo functions[] = { 13 static const FunctionInfo functions[] = {
13 {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"}, 14 {0, &Time::GetStandardUserSystemClock, "GetStandardUserSystemClock"},
@@ -16,12 +17,12 @@ Time::Time(std::shared_ptr<Module> time, const char* name)
16 {3, &Time::GetTimeZoneService, "GetTimeZoneService"}, 17 {3, &Time::GetTimeZoneService, "GetTimeZoneService"},
17 {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"}, 18 {4, &Time::GetStandardLocalSystemClock, "GetStandardLocalSystemClock"},
18 {5, nullptr, "GetEphemeralNetworkSystemClock"}, 19 {5, nullptr, "GetEphemeralNetworkSystemClock"},
19 {20, nullptr, "GetSharedMemoryNativeHandle"}, 20 {20, &Time::GetSharedMemoryNativeHandle, "GetSharedMemoryNativeHandle"},
20 {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"}, 21 {30, nullptr, "GetStandardNetworkClockOperationEventReadableHandle"},
21 {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"}, 22 {31, nullptr, "GetEphemeralNetworkClockOperationEventReadableHandle"},
22 {50, nullptr, "SetStandardSteadyClockInternalOffset"}, 23 {50, nullptr, "SetStandardSteadyClockInternalOffset"},
23 {100, nullptr, "IsStandardUserSystemClockAutomaticCorrectionEnabled"}, 24 {100, &Time::IsStandardUserSystemClockAutomaticCorrectionEnabled, "IsStandardUserSystemClockAutomaticCorrectionEnabled"},
24 {101, nullptr, "SetStandardUserSystemClockAutomaticCorrectionEnabled"}, 25 {101, &Time::SetStandardUserSystemClockAutomaticCorrectionEnabled, "SetStandardUserSystemClockAutomaticCorrectionEnabled"},
25 {102, nullptr, "GetStandardUserSystemClockInitialYear"}, 26 {102, nullptr, "GetStandardUserSystemClockInitialYear"},
26 {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"}, 27 {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
27 {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"}, 28 {201, nullptr, "GetStandardUserSystemClockAutomaticCorrectionUpdatedTime"},
diff --git a/src/core/hle/service/time/interface.h b/src/core/hle/service/time/interface.h
index cd6b44dec..bdf0883e2 100644
--- a/src/core/hle/service/time/interface.h
+++ b/src/core/hle/service/time/interface.h
@@ -8,9 +8,12 @@
8 8
9namespace Service::Time { 9namespace Service::Time {
10 10
11class SharedMemory;
12
11class Time final : public Module::Interface { 13class Time final : public Module::Interface {
12public: 14public:
13 explicit Time(std::shared_ptr<Module> time, const char* name); 15 explicit Time(std::shared_ptr<Module> time, std::shared_ptr<SharedMemory> shared_memory,
16 const char* name);
14 ~Time() override; 17 ~Time() override;
15}; 18};
16 19
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 346bad80d..ae6446204 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -13,6 +13,7 @@
13#include "core/hle/kernel/client_session.h" 13#include "core/hle/kernel/client_session.h"
14#include "core/hle/service/time/interface.h" 14#include "core/hle/service/time/interface.h"
15#include "core/hle/service/time/time.h" 15#include "core/hle/service/time/time.h"
16#include "core/hle/service/time/time_sharedmemory.h"
16#include "core/settings.h" 17#include "core/settings.h"
17 18
18namespace Service::Time { 19namespace Service::Time {
@@ -61,9 +62,18 @@ static u64 CalendarToPosix(const CalendarTime& calendar_time,
61 return static_cast<u64>(epoch_time); 62 return static_cast<u64>(epoch_time);
62} 63}
63 64
65enum class ClockContextType {
66 StandardSteady,
67 StandardUserSystem,
68 StandardNetworkSystem,
69 StandardLocalSystem,
70};
71
64class ISystemClock final : public ServiceFramework<ISystemClock> { 72class ISystemClock final : public ServiceFramework<ISystemClock> {
65public: 73public:
66 ISystemClock() : ServiceFramework("ISystemClock") { 74 ISystemClock(std::shared_ptr<Service::Time::SharedMemory> shared_memory,
75 ClockContextType clock_type)
76 : ServiceFramework("ISystemClock"), shared_memory(shared_memory), clock_type(clock_type) {
67 static const FunctionInfo functions[] = { 77 static const FunctionInfo functions[] = {
68 {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"}, 78 {0, &ISystemClock::GetCurrentTime, "GetCurrentTime"},
69 {1, nullptr, "SetCurrentTime"}, 79 {1, nullptr, "SetCurrentTime"},
@@ -72,6 +82,8 @@ public:
72 82
73 }; 83 };
74 RegisterHandlers(functions); 84 RegisterHandlers(functions);
85
86 UpdateSharedMemoryContext(system_clock_context);
75 } 87 }
76 88
77private: 89private:
@@ -87,34 +99,63 @@ private:
87 void GetSystemClockContext(Kernel::HLERequestContext& ctx) { 99 void GetSystemClockContext(Kernel::HLERequestContext& ctx) {
88 LOG_WARNING(Service_Time, "(STUBBED) called"); 100 LOG_WARNING(Service_Time, "(STUBBED) called");
89 101
90 SystemClockContext system_clock_ontext{}; 102 // TODO(ogniK): This should be updated periodically however since we have it stubbed we'll
103 // only update when we get a new context
104 UpdateSharedMemoryContext(system_clock_context);
105
91 IPC::ResponseBuilder rb{ctx, (sizeof(SystemClockContext) / 4) + 2}; 106 IPC::ResponseBuilder rb{ctx, (sizeof(SystemClockContext) / 4) + 2};
92 rb.Push(RESULT_SUCCESS); 107 rb.Push(RESULT_SUCCESS);
93 rb.PushRaw(system_clock_ontext); 108 rb.PushRaw(system_clock_context);
94 } 109 }
110
111 void UpdateSharedMemoryContext(const SystemClockContext& clock_context) {
112 switch (clock_type) {
113 case ClockContextType::StandardLocalSystem:
114 shared_memory->SetStandardLocalSystemClockContext(clock_context);
115 break;
116 case ClockContextType::StandardNetworkSystem:
117 shared_memory->SetStandardNetworkSystemClockContext(clock_context);
118 break;
119 }
120 }
121
122 SystemClockContext system_clock_context{};
123 std::shared_ptr<Service::Time::SharedMemory> shared_memory;
124 ClockContextType clock_type;
95}; 125};
96 126
97class ISteadyClock final : public ServiceFramework<ISteadyClock> { 127class ISteadyClock final : public ServiceFramework<ISteadyClock> {
98public: 128public:
99 ISteadyClock() : ServiceFramework("ISteadyClock") { 129 ISteadyClock(std::shared_ptr<SharedMemory> shared_memory)
130 : ServiceFramework("ISteadyClock"), shared_memory(shared_memory) {
100 static const FunctionInfo functions[] = { 131 static const FunctionInfo functions[] = {
101 {0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"}, 132 {0, &ISteadyClock::GetCurrentTimePoint, "GetCurrentTimePoint"},
102 }; 133 };
103 RegisterHandlers(functions); 134 RegisterHandlers(functions);
135
136 shared_memory->SetStandardSteadyClockTimepoint(GetCurrentTimePoint());
104 } 137 }
105 138
106private: 139private:
107 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) { 140 void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
108 LOG_DEBUG(Service_Time, "called"); 141 LOG_DEBUG(Service_Time, "called");
109 142
110 const auto& core_timing = Core::System::GetInstance().CoreTiming(); 143 const auto time_point = GetCurrentTimePoint();
111 const auto ms = Core::Timing::CyclesToMs(core_timing.GetTicks()); 144 // TODO(ogniK): This should be updated periodically
112 const SteadyClockTimePoint steady_clock_time_point{static_cast<u64_le>(ms.count() / 1000), 145 shared_memory->SetStandardSteadyClockTimepoint(time_point);
113 {}}; 146
114 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2}; 147 IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
115 rb.Push(RESULT_SUCCESS); 148 rb.Push(RESULT_SUCCESS);
116 rb.PushRaw(steady_clock_time_point); 149 rb.PushRaw(time_point);
117 } 150 }
151
152 SteadyClockTimePoint GetCurrentTimePoint() const {
153 const auto& core_timing = Core::System::GetInstance().CoreTiming();
154 const auto ms = Core::Timing::CyclesToMs(core_timing.GetTicks());
155 return {static_cast<u64_le>(ms.count() / 1000), {}};
156 }
157
158 std::shared_ptr<SharedMemory> shared_memory;
118}; 159};
119 160
120class ITimeZoneService final : public ServiceFramework<ITimeZoneService> { 161class ITimeZoneService final : public ServiceFramework<ITimeZoneService> {
@@ -233,7 +274,7 @@ void Module::Interface::GetStandardUserSystemClock(Kernel::HLERequestContext& ct
233 274
234 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 275 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
235 rb.Push(RESULT_SUCCESS); 276 rb.Push(RESULT_SUCCESS);
236 rb.PushIpcInterface<ISystemClock>(); 277 rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardUserSystem);
237} 278}
238 279
239void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) { 280void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext& ctx) {
@@ -241,7 +282,7 @@ void Module::Interface::GetStandardNetworkSystemClock(Kernel::HLERequestContext&
241 282
242 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 283 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
243 rb.Push(RESULT_SUCCESS); 284 rb.Push(RESULT_SUCCESS);
244 rb.PushIpcInterface<ISystemClock>(); 285 rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardNetworkSystem);
245} 286}
246 287
247void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) { 288void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) {
@@ -249,7 +290,7 @@ void Module::Interface::GetStandardSteadyClock(Kernel::HLERequestContext& ctx) {
249 290
250 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 291 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
251 rb.Push(RESULT_SUCCESS); 292 rb.Push(RESULT_SUCCESS);
252 rb.PushIpcInterface<ISteadyClock>(); 293 rb.PushIpcInterface<ISteadyClock>(shared_memory);
253} 294}
254 295
255void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) { 296void Module::Interface::GetTimeZoneService(Kernel::HLERequestContext& ctx) {
@@ -265,7 +306,7 @@ void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& c
265 306
266 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 307 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
267 rb.Push(RESULT_SUCCESS); 308 rb.Push(RESULT_SUCCESS);
268 rb.PushIpcInterface<ISystemClock>(); 309 rb.PushIpcInterface<ISystemClock>(shared_memory, ClockContextType::StandardLocalSystem);
269} 310}
270 311
271void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) { 312void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
@@ -333,16 +374,52 @@ void Module::Interface::CalculateStandardUserSystemClockDifferenceByUser(
333 rb.PushRaw<u64>(difference); 374 rb.PushRaw<u64>(difference);
334} 375}
335 376
336Module::Interface::Interface(std::shared_ptr<Module> time, const char* name) 377void Module::Interface::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
337 : ServiceFramework(name), time(std::move(time)) {} 378 LOG_DEBUG(Service_Time, "called");
379 IPC::ResponseBuilder rb{ctx, 2, 1};
380 rb.Push(RESULT_SUCCESS);
381 rb.PushCopyObjects(shared_memory->GetSharedMemoryHolder());
382}
383
384void Module::Interface::IsStandardUserSystemClockAutomaticCorrectionEnabled(
385 Kernel::HLERequestContext& ctx) {
386 // ogniK(TODO): When clock contexts are implemented, the value should be read from the context
387 // instead of our shared memory holder
388 LOG_DEBUG(Service_Time, "called");
389
390 IPC::ResponseBuilder rb{ctx, 3};
391 rb.Push(RESULT_SUCCESS);
392 rb.Push<u8>(shared_memory->GetStandardUserSystemClockAutomaticCorrectionEnabled());
393}
394
395void Module::Interface::SetStandardUserSystemClockAutomaticCorrectionEnabled(
396 Kernel::HLERequestContext& ctx) {
397 IPC::RequestParser rp{ctx};
398 const auto enabled = rp.Pop<u8>();
399
400 LOG_WARNING(Service_Time, "(PARTIAL IMPLEMENTATION) called");
401
402 // TODO(ogniK): Update clock contexts and correct timespans
403
404 shared_memory->SetStandardUserSystemClockAutomaticCorrectionEnabled(enabled > 0);
405 IPC::ResponseBuilder rb{ctx, 2};
406 rb.Push(RESULT_SUCCESS);
407}
408
409Module::Interface::Interface(std::shared_ptr<Module> time,
410 std::shared_ptr<SharedMemory> shared_memory, const char* name)
411 : ServiceFramework(name), time(std::move(time)), shared_memory(std::move(shared_memory)) {}
338 412
339Module::Interface::~Interface() = default; 413Module::Interface::~Interface() = default;
340 414
341void InstallInterfaces(SM::ServiceManager& service_manager) { 415void InstallInterfaces(Core::System& system) {
342 auto time = std::make_shared<Module>(); 416 auto time = std::make_shared<Module>();
343 std::make_shared<Time>(time, "time:a")->InstallAsService(service_manager); 417 auto shared_mem = std::make_shared<SharedMemory>(system);
344 std::make_shared<Time>(time, "time:s")->InstallAsService(service_manager); 418
345 std::make_shared<Time>(time, "time:u")->InstallAsService(service_manager); 419 std::make_shared<Time>(time, shared_mem, "time:a")->InstallAsService(system.ServiceManager());
420 std::make_shared<Time>(time, shared_mem, "time:s")->InstallAsService(system.ServiceManager());
421 std::make_shared<Time>(std::move(time), shared_mem, "time:u")
422 ->InstallAsService(system.ServiceManager());
346} 423}
347 424
348} // namespace Service::Time 425} // namespace Service::Time
diff --git a/src/core/hle/service/time/time.h b/src/core/hle/service/time/time.h
index f11affe95..e0708f856 100644
--- a/src/core/hle/service/time/time.h
+++ b/src/core/hle/service/time/time.h
@@ -10,6 +10,8 @@
10 10
11namespace Service::Time { 11namespace Service::Time {
12 12
13class SharedMemory;
14
13struct LocationName { 15struct LocationName {
14 std::array<u8, 0x24> name; 16 std::array<u8, 0x24> name;
15}; 17};
@@ -77,7 +79,8 @@ class Module final {
77public: 79public:
78 class Interface : public ServiceFramework<Interface> { 80 class Interface : public ServiceFramework<Interface> {
79 public: 81 public:
80 explicit Interface(std::shared_ptr<Module> time, const char* name); 82 explicit Interface(std::shared_ptr<Module> time,
83 std::shared_ptr<SharedMemory> shared_memory, const char* name);
81 ~Interface() override; 84 ~Interface() override;
82 85
83 void GetStandardUserSystemClock(Kernel::HLERequestContext& ctx); 86 void GetStandardUserSystemClock(Kernel::HLERequestContext& ctx);
@@ -87,13 +90,17 @@ public:
87 void GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx); 90 void GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx);
88 void GetClockSnapshot(Kernel::HLERequestContext& ctx); 91 void GetClockSnapshot(Kernel::HLERequestContext& ctx);
89 void CalculateStandardUserSystemClockDifferenceByUser(Kernel::HLERequestContext& ctx); 92 void CalculateStandardUserSystemClockDifferenceByUser(Kernel::HLERequestContext& ctx);
93 void GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx);
94 void IsStandardUserSystemClockAutomaticCorrectionEnabled(Kernel::HLERequestContext& ctx);
95 void SetStandardUserSystemClockAutomaticCorrectionEnabled(Kernel::HLERequestContext& ctx);
90 96
91 protected: 97 protected:
92 std::shared_ptr<Module> time; 98 std::shared_ptr<Module> time;
99 std::shared_ptr<SharedMemory> shared_memory;
93 }; 100 };
94}; 101};
95 102
96/// Registers all Time services with the specified service manager. 103/// Registers all Time services with the specified service manager.
97void InstallInterfaces(SM::ServiceManager& service_manager); 104void InstallInterfaces(Core::System& system);
98 105
99} // namespace Service::Time 106} // namespace Service::Time
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
new file mode 100644
index 000000000..bfc81b83c
--- /dev/null
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -0,0 +1,68 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/hle/service/time/time_sharedmemory.h"
7
8namespace Service::Time {
9const std::size_t SHARED_MEMORY_SIZE = 0x1000;
10
11SharedMemory::SharedMemory(Core::System& system) : system(system) {
12 shared_memory_holder = Kernel::SharedMemory::Create(
13 system.Kernel(), nullptr, SHARED_MEMORY_SIZE, Kernel::MemoryPermission::ReadWrite,
14 Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "Time:SharedMemory");
15
16 // Seems static from 1.0.0 -> 8.1.0. Specific games seem to check this value and crash
17 // if it's set to anything else
18 shared_memory_format.format_version = 14;
19 std::memcpy(shared_memory_holder->GetPointer(), &shared_memory_format, sizeof(Format));
20}
21
22SharedMemory::~SharedMemory() = default;
23
24Kernel::SharedPtr<Kernel::SharedMemory> SharedMemory::GetSharedMemoryHolder() const {
25 return shared_memory_holder;
26}
27
28void SharedMemory::SetStandardSteadyClockTimepoint(const SteadyClockTimePoint& timepoint) {
29 shared_memory_format.standard_steady_clock_timepoint.StoreData(
30 shared_memory_holder->GetPointer(), timepoint);
31}
32
33void SharedMemory::SetStandardLocalSystemClockContext(const SystemClockContext& context) {
34 shared_memory_format.standard_local_system_clock_context.StoreData(
35 shared_memory_holder->GetPointer(), context);
36}
37
38void SharedMemory::SetStandardNetworkSystemClockContext(const SystemClockContext& context) {
39 shared_memory_format.standard_network_system_clock_context.StoreData(
40 shared_memory_holder->GetPointer(), context);
41}
42
43void SharedMemory::SetStandardUserSystemClockAutomaticCorrectionEnabled(bool enabled) {
44 shared_memory_format.standard_user_system_clock_automatic_correction.StoreData(
45 shared_memory_holder->GetPointer(), enabled);
46}
47
48SteadyClockTimePoint SharedMemory::GetStandardSteadyClockTimepoint() {
49 return shared_memory_format.standard_steady_clock_timepoint.ReadData(
50 shared_memory_holder->GetPointer());
51}
52
53SystemClockContext SharedMemory::GetStandardLocalSystemClockContext() {
54 return shared_memory_format.standard_local_system_clock_context.ReadData(
55 shared_memory_holder->GetPointer());
56}
57
58SystemClockContext SharedMemory::GetStandardNetworkSystemClockContext() {
59 return shared_memory_format.standard_network_system_clock_context.ReadData(
60 shared_memory_holder->GetPointer());
61}
62
63bool SharedMemory::GetStandardUserSystemClockAutomaticCorrectionEnabled() {
64 return shared_memory_format.standard_user_system_clock_automatic_correction.ReadData(
65 shared_memory_holder->GetPointer());
66}
67
68} // namespace Service::Time
diff --git a/src/core/hle/service/time/time_sharedmemory.h b/src/core/hle/service/time/time_sharedmemory.h
new file mode 100644
index 000000000..cb8253541
--- /dev/null
+++ b/src/core/hle/service/time/time_sharedmemory.h
@@ -0,0 +1,74 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "core/hle/kernel/shared_memory.h"
9#include "core/hle/service/time/time.h"
10
11namespace Service::Time {
12class SharedMemory {
13public:
14 explicit SharedMemory(Core::System& system);
15 ~SharedMemory();
16
17 // Return the shared memory handle
18 Kernel::SharedPtr<Kernel::SharedMemory> GetSharedMemoryHolder() const;
19
20 // Set memory barriers in shared memory and update them
21 void SetStandardSteadyClockTimepoint(const SteadyClockTimePoint& timepoint);
22 void SetStandardLocalSystemClockContext(const SystemClockContext& context);
23 void SetStandardNetworkSystemClockContext(const SystemClockContext& context);
24 void SetStandardUserSystemClockAutomaticCorrectionEnabled(bool enabled);
25
26 // Pull from memory barriers in the shared memory
27 SteadyClockTimePoint GetStandardSteadyClockTimepoint();
28 SystemClockContext GetStandardLocalSystemClockContext();
29 SystemClockContext GetStandardNetworkSystemClockContext();
30 bool GetStandardUserSystemClockAutomaticCorrectionEnabled();
31
32 // TODO(ogniK): We have to properly simulate memory barriers, how are we going to do this?
33 template <typename T, std::size_t Offset>
34 struct MemoryBarrier {
35 static_assert(std::is_trivially_constructible_v<T>, "T must be trivially constructable");
36 u32_le read_attempt{};
37 std::array<T, 2> data{};
38
39 // These are not actually memory barriers at the moment as we don't have multicore and all
40 // HLE is mutexed. This will need to properly be implemented when we start updating the time
41 // points on threads. As of right now, we'll be updated both values synchronously and just
42 // incrementing the read_attempt to indicate that we waited.
43 void StoreData(u8* shared_memory, T data_to_store) {
44 std::memcpy(this, shared_memory + Offset, sizeof(*this));
45 read_attempt++;
46 data[read_attempt & 1] = data_to_store;
47 std::memcpy(shared_memory + Offset, this, sizeof(*this));
48 }
49
50 // For reading we're just going to read the last stored value. If there was no value stored
51 // it will just end up reading an empty value as intended.
52 T ReadData(u8* shared_memory) {
53 std::memcpy(this, shared_memory + Offset, sizeof(*this));
54 return data[(read_attempt - 1) & 1];
55 }
56 };
57
58 // Shared memory format
59 struct Format {
60 MemoryBarrier<SteadyClockTimePoint, 0x0> standard_steady_clock_timepoint;
61 MemoryBarrier<SystemClockContext, 0x38> standard_local_system_clock_context;
62 MemoryBarrier<SystemClockContext, 0x80> standard_network_system_clock_context;
63 MemoryBarrier<bool, 0xc8> standard_user_system_clock_automatic_correction;
64 u32_le format_version;
65 };
66 static_assert(sizeof(Format) == 0xd8, "Format is an invalid size");
67
68private:
69 Kernel::SharedPtr<Kernel::SharedMemory> shared_memory_holder{};
70 Core::System& system;
71 Format shared_memory_format{};
72};
73
74} // namespace Service::Time
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index f1fa6ccd1..199b30635 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -21,6 +21,7 @@
21#include "core/hle/kernel/readable_event.h" 21#include "core/hle/kernel/readable_event.h"
22#include "core/hle/kernel/thread.h" 22#include "core/hle/kernel/thread.h"
23#include "core/hle/kernel/writable_event.h" 23#include "core/hle/kernel/writable_event.h"
24#include "core/hle/service/nvdrv/nvdata.h"
24#include "core/hle/service/nvdrv/nvdrv.h" 25#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 26#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 27#include "core/hle/service/nvflinger/nvflinger.h"
@@ -328,32 +329,22 @@ public:
328 Data data; 329 Data data;
329}; 330};
330 331
331struct BufferProducerFence {
332 u32 is_valid;
333 std::array<Nvidia::IoctlFence, 4> fences;
334};
335static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size");
336
337class IGBPDequeueBufferResponseParcel : public Parcel { 332class IGBPDequeueBufferResponseParcel : public Parcel {
338public: 333public:
339 explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} 334 explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence)
335 : slot(slot), multi_fence(multi_fence) {}
340 ~IGBPDequeueBufferResponseParcel() override = default; 336 ~IGBPDequeueBufferResponseParcel() override = default;
341 337
342protected: 338protected:
343 void SerializeData() override { 339 void SerializeData() override {
344 // TODO(Subv): Find out how this Fence is used.
345 BufferProducerFence fence = {};
346 fence.is_valid = 1;
347 for (auto& fence_ : fence.fences)
348 fence_.id = -1;
349
350 Write(slot); 340 Write(slot);
351 Write<u32_le>(1); 341 Write<u32_le>(1);
352 WriteObject(fence); 342 WriteObject(multi_fence);
353 Write<u32_le>(0); 343 Write<u32_le>(0);
354 } 344 }
355 345
356 u32_le slot; 346 u32_le slot;
347 Service::Nvidia::MultiFence multi_fence;
357}; 348};
358 349
359class IGBPRequestBufferRequestParcel : public Parcel { 350class IGBPRequestBufferRequestParcel : public Parcel {
@@ -400,12 +391,6 @@ public:
400 data = Read<Data>(); 391 data = Read<Data>();
401 } 392 }
402 393
403 struct Fence {
404 u32_le id;
405 u32_le value;
406 };
407 static_assert(sizeof(Fence) == 8, "Fence has wrong size");
408
409 struct Data { 394 struct Data {
410 u32_le slot; 395 u32_le slot;
411 INSERT_PADDING_WORDS(3); 396 INSERT_PADDING_WORDS(3);
@@ -418,15 +403,15 @@ public:
418 s32_le scaling_mode; 403 s32_le scaling_mode;
419 NVFlinger::BufferQueue::BufferTransformFlags transform; 404 NVFlinger::BufferQueue::BufferTransformFlags transform;
420 u32_le sticky_transform; 405 u32_le sticky_transform;
421 INSERT_PADDING_WORDS(2); 406 INSERT_PADDING_WORDS(1);
422 u32_le fence_is_valid; 407 u32_le swap_interval;
423 std::array<Fence, 2> fences; 408 Service::Nvidia::MultiFence multi_fence;
424 409
425 Common::Rectangle<int> GetCropRect() const { 410 Common::Rectangle<int> GetCropRect() const {
426 return {crop_left, crop_top, crop_right, crop_bottom}; 411 return {crop_left, crop_top, crop_right, crop_bottom};
427 } 412 }
428 }; 413 };
429 static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); 414 static_assert(sizeof(Data) == 96, "ParcelData has wrong size");
430 415
431 Data data; 416 Data data;
432}; 417};
@@ -547,11 +532,11 @@ private:
547 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
548 const u32 width{request.data.width}; 533 const u32 width{request.data.width};
549 const u32 height{request.data.height}; 534 const u32 height{request.data.height};
550 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 535 auto result = buffer_queue.DequeueBuffer(width, height);
551 536
552 if (slot) { 537 if (result) {
553 // Buffer is available 538 // Buffer is available
554 IGBPDequeueBufferResponseParcel response{*slot}; 539 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
555 ctx.WriteBuffer(response.Serialize()); 540 ctx.WriteBuffer(response.Serialize());
556 } else { 541 } else {
557 // Wait the current thread until a buffer becomes available 542 // Wait the current thread until a buffer becomes available
@@ -561,10 +546,10 @@ private:
561 Kernel::ThreadWakeupReason reason) { 546 Kernel::ThreadWakeupReason reason) {
562 // Repeat TransactParcel DequeueBuffer when a buffer is available 547 // Repeat TransactParcel DequeueBuffer when a buffer is available
563 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 548 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
564 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 549 auto result = buffer_queue.DequeueBuffer(width, height);
565 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 550 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
566 551
567 IGBPDequeueBufferResponseParcel response{*slot}; 552 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
568 ctx.WriteBuffer(response.Serialize()); 553 ctx.WriteBuffer(response.Serialize());
569 IPC::ResponseBuilder rb{ctx, 2}; 554 IPC::ResponseBuilder rb{ctx, 2};
570 rb.Push(RESULT_SUCCESS); 555 rb.Push(RESULT_SUCCESS);
@@ -582,7 +567,8 @@ private:
582 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
583 568
584 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
585 request.data.GetCropRect()); 570 request.data.GetCropRect(), request.data.swap_interval,
571 request.data.multi_fence);
586 572
587 IGBPQueueBufferResponseParcel response{1280, 720}; 573 IGBPQueueBufferResponseParcel response{1280, 720};
588 ctx.WriteBuffer(response.Serialize()); 574 ctx.WriteBuffer(response.Serialize());
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6d4b02375..f1795fdd6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
295 } 295 }
296 } 296 }
297 297
298 std::vector<u8> program_image(total_image_size); 298 Kernel::PhysicalMemory program_image(total_image_size);
299 std::size_t current_image_position = 0; 299 std::size_t current_image_position = 0;
300 300
301 Kernel::CodeSet codeset; 301 Kernel::CodeSet codeset;
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 70051c13a..474b55cb1 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
69 69
70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); 70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
71 Kernel::CodeSet codeset; 71 Kernel::CodeSet codeset;
72 std::vector<u8> program_image; 72 Kernel::PhysicalMemory program_image;
73 73
74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, 74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment,
75 const std::vector<u8>& data, u32 offset) { 75 const std::vector<u8>& data, u32 offset) {
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 6a0ca389b..3a5361fdd 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
143 } 143 }
144 144
145 // Build program image 145 // Build program image
146 std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); 146 Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size));
147 std::memcpy(program_image.data(), data.data(), program_image.size()); 147 std::memcpy(program_image.data(), data.data(), program_image.size());
148 if (program_image.size() != PageAlignSize(nro_header.file_size)) { 148 if (program_image.size() != PageAlignSize(nro_header.file_size)) {
149 return {}; 149 return {};
@@ -258,6 +258,15 @@ ResultStatus AppLoader_NRO::ReadTitle(std::string& title) {
258 return ResultStatus::Success; 258 return ResultStatus::Success;
259} 259}
260 260
261ResultStatus AppLoader_NRO::ReadControlData(FileSys::NACP& control) {
262 if (nacp == nullptr) {
263 return ResultStatus::ErrorNoControl;
264 }
265
266 control = *nacp;
267 return ResultStatus::Success;
268}
269
261bool AppLoader_NRO::IsRomFSUpdatable() const { 270bool AppLoader_NRO::IsRomFSUpdatable() const {
262 return false; 271 return false;
263} 272}
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 1ffdae805..71811bc29 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -43,6 +43,7 @@ public:
43 ResultStatus ReadProgramId(u64& out_program_id) override; 43 ResultStatus ReadProgramId(u64& out_program_id) override;
44 ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; 44 ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
45 ResultStatus ReadTitle(std::string& title) override; 45 ResultStatus ReadTitle(std::string& title) override;
46 ResultStatus ReadControlData(FileSys::NACP& control) override;
46 bool IsRomFSUpdatable() const override; 47 bool IsRomFSUpdatable() const override;
47 48
48private: 49private:
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 29311404a..70c90109f 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
89 89
90 // Build program image 90 // Build program image
91 Kernel::CodeSet codeset; 91 Kernel::CodeSet codeset;
92 std::vector<u8> program_image; 92 Kernel::PhysicalMemory program_image;
93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { 93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
94 std::vector<u8> data = 94 std::vector<u8> data =
95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); 95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f18f6226b..8555691c0 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -16,11 +16,9 @@
16#include "core/core.h" 16#include "core/core.h"
17#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
18#include "core/hle/kernel/vm_manager.h" 18#include "core/hle/kernel/vm_manager.h"
19#include "core/hle/lock.h"
20#include "core/memory.h" 19#include "core/memory.h"
21#include "core/memory_setup.h" 20#include "core/memory_setup.h"
22#include "video_core/gpu.h" 21#include "video_core/gpu.h"
23#include "video_core/renderer_base.h"
24 22
25namespace Memory { 23namespace Memory {
26 24
diff --git a/src/core/memory.h b/src/core/memory.h
index 04e2c5f1d..09008e1dd 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -8,10 +8,6 @@
8#include <string> 8#include <string>
9#include "common/common_types.h" 9#include "common/common_types.h"
10 10
11namespace Common {
12struct PageTable;
13}
14
15namespace Kernel { 11namespace Kernel {
16class Process; 12class Process;
17} 13}
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index 774022569..cfe0771e2 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -2,8 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <ctime>
5#include <fstream> 6#include <fstream>
7
8#include <fmt/chrono.h>
9#include <fmt/format.h>
6#include <json.hpp> 10#include <json.hpp>
11
7#include "common/file_util.h" 12#include "common/file_util.h"
8#include "common/hex_util.h" 13#include "common/hex_util.h"
9#include "common/scm_rev.h" 14#include "common/scm_rev.h"
@@ -14,7 +19,6 @@
14#include "core/hle/result.h" 19#include "core/hle/result.h"
15#include "core/reporter.h" 20#include "core/reporter.h"
16#include "core/settings.h" 21#include "core/settings.h"
17#include "fmt/time.h"
18 22
19namespace { 23namespace {
20 24
@@ -30,9 +34,11 @@ std::string GetTimestamp() {
30 34
31using namespace nlohmann; 35using namespace nlohmann;
32 36
33void SaveToFile(const json& json, const std::string& filename) { 37void SaveToFile(json json, const std::string& filename) {
34 if (!FileUtil::CreateFullPath(filename)) 38 if (!FileUtil::CreateFullPath(filename)) {
35 LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename); 39 LOG_ERROR(Core, "Failed to create path for '{}' to save report!", filename);
40 return;
41 }
36 42
37 std::ofstream file( 43 std::ofstream file(
38 FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault)); 44 FileUtil::SanitizePath(filename, FileUtil::DirectorySeparator::PlatformDefault));
@@ -61,8 +67,11 @@ json GetReportCommonData(u64 title_id, ResultCode result, const std::string& tim
61 {"result_description", fmt::format("{:08X}", result.description.Value())}, 67 {"result_description", fmt::format("{:08X}", result.description.Value())},
62 {"timestamp", timestamp}, 68 {"timestamp", timestamp},
63 }; 69 };
64 if (user_id.has_value()) 70
71 if (user_id.has_value()) {
65 out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]); 72 out["user_id"] = fmt::format("{:016X}{:016X}", (*user_id)[1], (*user_id)[0]);
73 }
74
66 return out; 75 return out;
67} 76}
68 77
@@ -171,14 +180,14 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx) {
171 out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC()); 180 out["buffer_descriptor_c"] = GetHLEBufferDescriptorData<false>(ctx.BufferDescriptorC());
172 out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX()); 181 out["buffer_descriptor_x"] = GetHLEBufferDescriptorData<true>(ctx.BufferDescriptorX());
173 182
174 return std::move(out); 183 return out;
175} 184}
176 185
177} // Anonymous namespace 186} // Anonymous namespace
178 187
179namespace Core { 188namespace Core {
180 189
181Reporter::Reporter(Core::System& system) : system(system) {} 190Reporter::Reporter(System& system) : system(system) {}
182 191
183Reporter::~Reporter() = default; 192Reporter::~Reporter() = default;
184 193
@@ -187,8 +196,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u
187 const std::array<u64, 31>& registers, 196 const std::array<u64, 31>& registers,
188 const std::array<u64, 32>& backtrace, u32 backtrace_size, 197 const std::array<u64, 32>& backtrace, u32 backtrace_size,
189 const std::string& arch, u32 unk10) const { 198 const std::string& arch, u32 unk10) const {
190 if (!IsReportingEnabled()) 199 if (!IsReportingEnabled()) {
191 return; 200 return;
201 }
192 202
193 const auto timestamp = GetTimestamp(); 203 const auto timestamp = GetTimestamp();
194 json out; 204 json out;
@@ -212,8 +222,9 @@ void Reporter::SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u
212 222
213void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2, 223void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64 info2,
214 std::optional<std::vector<u8>> resolved_buffer) const { 224 std::optional<std::vector<u8>> resolved_buffer) const {
215 if (!IsReportingEnabled()) 225 if (!IsReportingEnabled()) {
216 return; 226 return;
227 }
217 228
218 const auto timestamp = GetTimestamp(); 229 const auto timestamp = GetTimestamp();
219 const auto title_id = system.CurrentProcess()->GetTitleID(); 230 const auto title_id = system.CurrentProcess()->GetTitleID();
@@ -238,8 +249,9 @@ void Reporter::SaveSvcBreakReport(u32 type, bool signal_debugger, u64 info1, u64
238void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id, 249void Reporter::SaveUnimplementedFunctionReport(Kernel::HLERequestContext& ctx, u32 command_id,
239 const std::string& name, 250 const std::string& name,
240 const std::string& service_name) const { 251 const std::string& service_name) const {
241 if (!IsReportingEnabled()) 252 if (!IsReportingEnabled()) {
242 return; 253 return;
254 }
243 255
244 const auto timestamp = GetTimestamp(); 256 const auto timestamp = GetTimestamp();
245 const auto title_id = system.CurrentProcess()->GetTitleID(); 257 const auto title_id = system.CurrentProcess()->GetTitleID();
@@ -259,8 +271,9 @@ void Reporter::SaveUnimplementedAppletReport(
259 u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color, 271 u32 applet_id, u32 common_args_version, u32 library_version, u32 theme_color,
260 bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel, 272 bool startup_sound, u64 system_tick, std::vector<std::vector<u8>> normal_channel,
261 std::vector<std::vector<u8>> interactive_channel) const { 273 std::vector<std::vector<u8>> interactive_channel) const {
262 if (!IsReportingEnabled()) 274 if (!IsReportingEnabled()) {
263 return; 275 return;
276 }
264 277
265 const auto timestamp = GetTimestamp(); 278 const auto timestamp = GetTimestamp();
266 const auto title_id = system.CurrentProcess()->GetTitleID(); 279 const auto title_id = system.CurrentProcess()->GetTitleID();
@@ -293,8 +306,9 @@ void Reporter::SaveUnimplementedAppletReport(
293 306
294void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data, 307void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vector<u8>> data,
295 std::optional<u128> user_id) const { 308 std::optional<u128> user_id) const {
296 if (!IsReportingEnabled()) 309 if (!IsReportingEnabled()) {
297 return; 310 return;
311 }
298 312
299 const auto timestamp = GetTimestamp(); 313 const auto timestamp = GetTimestamp();
300 json out; 314 json out;
@@ -316,8 +330,9 @@ void Reporter::SavePlayReport(u64 title_id, u64 process_id, std::vector<std::vec
316void Reporter::SaveErrorReport(u64 title_id, ResultCode result, 330void Reporter::SaveErrorReport(u64 title_id, ResultCode result,
317 std::optional<std::string> custom_text_main, 331 std::optional<std::string> custom_text_main,
318 std::optional<std::string> custom_text_detail) const { 332 std::optional<std::string> custom_text_detail) const {
319 if (!IsReportingEnabled()) 333 if (!IsReportingEnabled()) {
320 return; 334 return;
335 }
321 336
322 const auto timestamp = GetTimestamp(); 337 const auto timestamp = GetTimestamp();
323 json out; 338 json out;
@@ -335,12 +350,31 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result,
335 SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); 350 SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp));
336} 351}
337 352
338void Reporter::SaveUserReport() const { 353void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode,
354 std::string log_message) const {
339 if (!IsReportingEnabled()) 355 if (!IsReportingEnabled())
340 return; 356 return;
341 357
342 const auto timestamp = GetTimestamp(); 358 const auto timestamp = GetTimestamp();
343 const auto title_id = system.CurrentProcess()->GetTitleID(); 359 const auto title_id = system.CurrentProcess()->GetTitleID();
360 json out;
361
362 out["yuzu_version"] = GetYuzuVersionData();
363 out["report_common"] = GetReportCommonData(title_id, RESULT_SUCCESS, timestamp);
364
365 out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode));
366 out["log_message"] = std::move(log_message);
367
368 SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp));
369}
370
371void Reporter::SaveUserReport() const {
372 if (!IsReportingEnabled()) {
373 return;
374 }
375
376 const auto timestamp = GetTimestamp();
377 const auto title_id = system.CurrentProcess()->GetTitleID();
344 378
345 SaveToFile(GetFullDataAuto(timestamp, title_id, system), 379 SaveToFile(GetFullDataAuto(timestamp, title_id, system),
346 GetPath("user_report", title_id, timestamp)); 380 GetPath("user_report", title_id, timestamp));
diff --git a/src/core/reporter.h b/src/core/reporter.h
index 3de19c0f7..44256de50 100644
--- a/src/core/reporter.h
+++ b/src/core/reporter.h
@@ -4,7 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <optional> 8#include <optional>
9#include <string>
8#include <vector> 10#include <vector>
9#include "common/common_types.h" 11#include "common/common_types.h"
10 12
@@ -14,11 +16,17 @@ namespace Kernel {
14class HLERequestContext; 16class HLERequestContext;
15} // namespace Kernel 17} // namespace Kernel
16 18
19namespace Service::FileSystem {
20enum class LogMode : u32;
21}
22
17namespace Core { 23namespace Core {
18 24
25class System;
26
19class Reporter { 27class Reporter {
20public: 28public:
21 explicit Reporter(Core::System& system); 29 explicit Reporter(System& system);
22 ~Reporter(); 30 ~Reporter();
23 31
24 void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp, 32 void SaveCrashReport(u64 title_id, ResultCode result, u64 set_flags, u64 entry_point, u64 sp,
@@ -45,12 +53,15 @@ public:
45 std::optional<std::string> custom_text_main = {}, 53 std::optional<std::string> custom_text_main = {},
46 std::optional<std::string> custom_text_detail = {}) const; 54 std::optional<std::string> custom_text_detail = {}) const;
47 55
56 void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode,
57 std::string log_message) const;
58
48 void SaveUserReport() const; 59 void SaveUserReport() const;
49 60
50private: 61private:
51 bool IsReportingEnabled() const; 62 bool IsReportingEnabled() const;
52 63
53 Core::System& system; 64 System& system;
54}; 65};
55 66
56} // namespace Core 67} // namespace Core
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 6d32ebea3..0dd1632ac 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -85,7 +85,6 @@ void LogSettings() {
85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
86 LogSetting("System_CurrentUser", Settings::values.current_user); 86 LogSetting("System_CurrentUser", Settings::values.current_user);
87 LogSetting("System_LanguageIndex", Settings::values.language_index); 87 LogSetting("System_LanguageIndex", Settings::values.language_index);
88 LogSetting("Core_UseCpuJit", Settings::values.use_cpu_jit);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 88 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 89 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 90 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
diff --git a/src/core/settings.h b/src/core/settings.h
index e2ffcaaf7..6638ce8f9 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -378,7 +378,6 @@ struct Values {
378 std::atomic_bool is_device_reload_pending{true}; 378 std::atomic_bool is_device_reload_pending{true};
379 379
380 // Core 380 // Core
381 bool use_cpu_jit;
382 bool use_multi_core; 381 bool use_multi_core;
383 382
384 // Data Storage 383 // Data Storage
@@ -416,6 +415,7 @@ struct Values {
416 bool dump_exefs; 415 bool dump_exefs;
417 bool dump_nso; 416 bool dump_nso;
418 bool reporting_services; 417 bool reporting_services;
418 bool quest_flag;
419 419
420 // WebService 420 // WebService
421 bool enable_telemetry; 421 bool enable_telemetry;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 90d06830f..793d102d3 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); 168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", 169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
170 Settings::values.enable_audio_stretching); 170 Settings::values.enable_audio_stretching);
171 AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
172 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", 171 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
173 Settings::values.use_multi_core); 172 Settings::values.use_multi_core);
174 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", 173 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f8b67cbe1..e2f85c5f1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,4 +1,7 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h
2 dma_pusher.cpp 5 dma_pusher.cpp
3 dma_pusher.h 6 dma_pusher.h
4 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
@@ -41,12 +44,10 @@ add_library(video_core STATIC
41 renderer_opengl/gl_buffer_cache.h 44 renderer_opengl/gl_buffer_cache.h
42 renderer_opengl/gl_device.cpp 45 renderer_opengl/gl_device.cpp
43 renderer_opengl/gl_device.h 46 renderer_opengl/gl_device.h
44 renderer_opengl/gl_global_cache.cpp 47 renderer_opengl/gl_framebuffer_cache.cpp
45 renderer_opengl/gl_global_cache.h 48 renderer_opengl/gl_framebuffer_cache.h
46 renderer_opengl/gl_rasterizer.cpp 49 renderer_opengl/gl_rasterizer.cpp
47 renderer_opengl/gl_rasterizer.h 50 renderer_opengl/gl_rasterizer.h
48 renderer_opengl/gl_rasterizer_cache.cpp
49 renderer_opengl/gl_rasterizer_cache.h
50 renderer_opengl/gl_resource_manager.cpp 51 renderer_opengl/gl_resource_manager.cpp
51 renderer_opengl/gl_resource_manager.h 52 renderer_opengl/gl_resource_manager.h
52 renderer_opengl/gl_sampler_cache.cpp 53 renderer_opengl/gl_sampler_cache.cpp
@@ -67,6 +68,8 @@ add_library(video_core STATIC
67 renderer_opengl/gl_state.h 68 renderer_opengl/gl_state.h
68 renderer_opengl/gl_stream_buffer.cpp 69 renderer_opengl/gl_stream_buffer.cpp
69 renderer_opengl/gl_stream_buffer.h 70 renderer_opengl/gl_stream_buffer.h
71 renderer_opengl/gl_texture_cache.cpp
72 renderer_opengl/gl_texture_cache.h
70 renderer_opengl/maxwell_to_gl.h 73 renderer_opengl/maxwell_to_gl.h
71 renderer_opengl/renderer_opengl.cpp 74 renderer_opengl/renderer_opengl.cpp
72 renderer_opengl/renderer_opengl.h 75 renderer_opengl/renderer_opengl.h
@@ -88,6 +91,7 @@ add_library(video_core STATIC
88 shader/decode/conversion.cpp 91 shader/decode/conversion.cpp
89 shader/decode/memory.cpp 92 shader/decode/memory.cpp
90 shader/decode/texture.cpp 93 shader/decode/texture.cpp
94 shader/decode/image.cpp
91 shader/decode/float_set_predicate.cpp 95 shader/decode/float_set_predicate.cpp
92 shader/decode/integer_set_predicate.cpp 96 shader/decode/integer_set_predicate.cpp
93 shader/decode/half_set_predicate.cpp 97 shader/decode/half_set_predicate.cpp
@@ -98,8 +102,11 @@ add_library(video_core STATIC
98 shader/decode/integer_set.cpp 102 shader/decode/integer_set.cpp
99 shader/decode/half_set.cpp 103 shader/decode/half_set.cpp
100 shader/decode/video.cpp 104 shader/decode/video.cpp
105 shader/decode/warp.cpp
101 shader/decode/xmad.cpp 106 shader/decode/xmad.cpp
102 shader/decode/other.cpp 107 shader/decode/other.cpp
108 shader/control_flow.cpp
109 shader/control_flow.h
103 shader/decode.cpp 110 shader/decode.cpp
104 shader/node_helper.cpp 111 shader/node_helper.cpp
105 shader/node_helper.h 112 shader/node_helper.h
@@ -109,6 +116,13 @@ add_library(video_core STATIC
109 shader/track.cpp 116 shader/track.cpp
110 surface.cpp 117 surface.cpp
111 surface.h 118 surface.h
119 texture_cache/surface_base.cpp
120 texture_cache/surface_base.h
121 texture_cache/surface_params.cpp
122 texture_cache/surface_params.h
123 texture_cache/surface_view.cpp
124 texture_cache/surface_view.h
125 texture_cache/texture_cache.h
112 textures/astc.cpp 126 textures/astc.cpp
113 textures/astc.h 127 textures/astc.h
114 textures/convert.cpp 128 textures/convert.cpp
@@ -116,8 +130,6 @@ add_library(video_core STATIC
116 textures/decoders.cpp 130 textures/decoders.cpp
117 textures/decoders.h 131 textures/decoders.h
118 textures/texture.h 132 textures/texture.h
119 texture_cache.cpp
120 texture_cache.h
121 video_core.cpp 133 video_core.cpp
122 video_core.h 134 video_core.h
123) 135)
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..4b9193182
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,76 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_set>
8#include <utility>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13
14namespace VideoCommon {
15
16class BufferBlock {
17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start);
20 }
21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end;
24 }
25
26 u8* GetWritableHostPtr() const {
27 return FromCacheAddr(cache_addr);
28 }
29
30 u8* GetWritableHostPtr(std::size_t offset) const {
31 return FromCacheAddr(cache_addr + offset);
32 }
33
34 std::size_t GetOffset(const CacheAddr in_addr) {
35 return static_cast<std::size_t>(in_addr - cache_addr);
36 }
37
38 CacheAddr GetCacheAddr() const {
39 return cache_addr;
40 }
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 }
50
51 std::size_t GetSize() const {
52 return size;
53 }
54
55 void SetEpoch(u64 new_epoch) {
56 epoch = new_epoch;
57 }
58
59 u64 GetEpoch() {
60 return epoch;
61 }
62
63protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr);
66 }
67 ~BufferBlock() = default;
68
69private:
70 CacheAddr cache_addr{};
71 CacheAddr cache_addr_end{};
72 std::size_t size{};
73 u64 epoch{};
74};
75
76} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..2442ddfd6
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,447 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/buffer_cache/buffer_block.h"
19#include "video_core/buffer_cache/map_interval.h"
20#include "video_core/memory_manager.h"
21#include "video_core/rasterizer_interface.h"
22
23namespace VideoCommon {
24
25using MapInterval = std::shared_ptr<MapIntervalBase>;
26
27template <typename TBuffer, typename TBufferType, typename StreamBuffer>
28class BufferCache {
29public:
30 using BufferInfo = std::pair<const TBufferType*, u64>;
31
32 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
33 bool is_written = false) {
34 std::lock_guard lock{mutex};
35
36 auto& memory_manager = system.GPU().MemoryManager();
37 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
38 if (!host_ptr) {
39 return {GetEmptyBuffer(size), 0};
40 }
41 const auto cache_addr = ToCacheAddr(host_ptr);
42
43 // Cache management is a big overhead, so only cache entries with a given size.
44 // TODO: Figure out which size is the best for given games.
45 constexpr std::size_t max_stream_size = 0x800;
46 if (size < max_stream_size) {
47 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
48 return StreamBufferUpload(host_ptr, size, alignment);
49 }
50 }
51
52 auto block = GetBlock(cache_addr, size);
53 auto map = MapAddress(block, gpu_addr, cache_addr, size);
54 if (is_written) {
55 map->MarkAsModified(true, GetModifiedTicks());
56 if (!map->IsWritten()) {
57 map->MarkAsWritten(true);
58 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
59 }
60 } else {
61 if (map->IsWritten()) {
62 WriteBarrier();
63 }
64 }
65
66 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
67
68 return {ToHandle(block), offset};
69 }
70
71 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
72 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
73 std::size_t alignment = 4) {
74 std::lock_guard lock{mutex};
75 return StreamBufferUpload(raw_pointer, size, alignment);
76 }
77
78 void Map(std::size_t max_size) {
79 std::lock_guard lock{mutex};
80
81 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
82 buffer_offset = buffer_offset_base;
83 }
84
85 /// Finishes the upload stream, returns true on bindings invalidation.
86 bool Unmap() {
87 std::lock_guard lock{mutex};
88
89 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
90 return std::exchange(invalidated, false);
91 }
92
93 void TickFrame() {
94 ++epoch;
95 while (!pending_destruction.empty()) {
96 if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
97 break;
98 }
99 pending_destruction.pop_front();
100 }
101 }
102
103 /// Write any cached resources overlapping the specified region back to memory
104 void FlushRegion(CacheAddr addr, std::size_t size) {
105 std::lock_guard lock{mutex};
106
107 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
108 std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
109 return a->GetModificationTick() < b->GetModificationTick();
110 });
111 for (auto& object : objects) {
112 if (object->IsModified() && object->IsRegistered()) {
113 FlushMap(object);
114 }
115 }
116 }
117
118 /// Mark the specified region as being invalidated
119 void InvalidateRegion(CacheAddr addr, u64 size) {
120 std::lock_guard lock{mutex};
121
122 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
123 for (auto& object : objects) {
124 if (object->IsRegistered()) {
125 Unregister(object);
126 }
127 }
128 }
129
130 virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
131
132protected:
133 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
134 std::unique_ptr<StreamBuffer> stream_buffer)
135 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
136 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
137
138 ~BufferCache() = default;
139
140 virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
141
142 virtual void WriteBarrier() = 0;
143
144 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
145
146 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
147 const u8* data) = 0;
148
149 virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
150 u8* data) = 0;
151
152 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
153 std::size_t dst_offset, std::size_t size) = 0;
154
155 /// Register an object into the cache
156 void Register(const MapInterval& new_map, bool inherit_written = false) {
157 const CacheAddr cache_ptr = new_map->GetStart();
158 const std::optional<VAddr> cpu_addr =
159 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
160 if (!cache_ptr || !cpu_addr) {
161 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
162 new_map->GetGpuAddress());
163 return;
164 }
165 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
166 new_map->SetCpuAddress(*cpu_addr);
167 new_map->MarkAsRegistered(true);
168 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
169 mapped_addresses.insert({interval, new_map});
170 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
171 if (inherit_written) {
172 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
173 new_map->MarkAsWritten(true);
174 }
175 }
176
177 /// Unregisters an object from the cache
178 void Unregister(MapInterval& map) {
179 const std::size_t size = map->GetEnd() - map->GetStart();
180 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
181 map->MarkAsRegistered(false);
182 if (map->IsWritten()) {
183 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
184 }
185 const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
186 mapped_addresses.erase(delete_interval);
187 }
188
189private:
190 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
191 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
192 }
193
194 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
195 const CacheAddr cache_addr, const std::size_t size) {
196
197 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
198 if (overlaps.empty()) {
199 const CacheAddr cache_addr_end = cache_addr + size;
200 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
201 u8* host_ptr = FromCacheAddr(cache_addr);
202 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
203 Register(new_map);
204 return new_map;
205 }
206
207 const CacheAddr cache_addr_end = cache_addr + size;
208 if (overlaps.size() == 1) {
209 MapInterval& current_map = overlaps[0];
210 if (current_map->IsInside(cache_addr, cache_addr_end)) {
211 return current_map;
212 }
213 }
214 CacheAddr new_start = cache_addr;
215 CacheAddr new_end = cache_addr_end;
216 bool write_inheritance = false;
217 bool modified_inheritance = false;
218 // Calculate new buffer parameters
219 for (auto& overlap : overlaps) {
220 new_start = std::min(overlap->GetStart(), new_start);
221 new_end = std::max(overlap->GetEnd(), new_end);
222 write_inheritance |= overlap->IsWritten();
223 modified_inheritance |= overlap->IsModified();
224 }
225 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
226 for (auto& overlap : overlaps) {
227 Unregister(overlap);
228 }
229 UpdateBlock(block, new_start, new_end, overlaps);
230 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
231 if (modified_inheritance) {
232 new_map->MarkAsModified(true, GetModifiedTicks());
233 }
234 Register(new_map, write_inheritance);
235 return new_map;
236 }
237
238 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
239 std::vector<MapInterval>& overlaps) {
240 const IntervalType base_interval{start, end};
241 IntervalSet interval_set{};
242 interval_set.add(base_interval);
243 for (auto& overlap : overlaps) {
244 const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
245 interval_set.subtract(subtract);
246 }
247 for (auto& interval : interval_set) {
248 std::size_t size = interval.upper() - interval.lower();
249 if (size > 0) {
250 u8* host_ptr = FromCacheAddr(interval.lower());
251 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
252 }
253 }
254 }
255
256 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
257 if (size == 0) {
258 return {};
259 }
260
261 std::vector<MapInterval> objects{};
262 const IntervalType interval{addr, addr + size};
263 for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
264 objects.push_back(pair.second);
265 }
266
267 return objects;
268 }
269
270 /// Returns a ticks counter used for tracking when cached objects were last modified
271 u64 GetModifiedTicks() {
272 return ++modified_ticks;
273 }
274
275 void FlushMap(MapInterval map) {
276 std::size_t size = map->GetEnd() - map->GetStart();
277 TBuffer block = blocks[map->GetStart() >> block_page_bits];
278 u8* host_ptr = FromCacheAddr(map->GetStart());
279 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
280 map->MarkAsModified(false, 0);
281 }
282
283 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
284 std::size_t alignment) {
285 AlignBuffer(alignment);
286 const std::size_t uploaded_offset = buffer_offset;
287 std::memcpy(buffer_ptr, raw_pointer, size);
288
289 buffer_ptr += size;
290 buffer_offset += size;
291 return {&stream_buffer_handle, uploaded_offset};
292 }
293
294 void AlignBuffer(std::size_t alignment) {
295 // Align the offset, not the mapped pointer
296 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
297 buffer_ptr += offset_aligned - buffer_offset;
298 buffer_offset = offset_aligned;
299 }
300
301 TBuffer EnlargeBlock(TBuffer buffer) {
302 const std::size_t old_size = buffer->GetSize();
303 const std::size_t new_size = old_size + block_page_size;
304 const CacheAddr cache_addr = buffer->GetCacheAddr();
305 TBuffer new_buffer = CreateBlock(cache_addr, new_size);
306 CopyBlock(buffer, new_buffer, 0, 0, old_size);
307 buffer->SetEpoch(epoch);
308 pending_destruction.push_back(buffer);
309 const CacheAddr cache_addr_end = cache_addr + new_size - 1;
310 u64 page_start = cache_addr >> block_page_bits;
311 const u64 page_end = cache_addr_end >> block_page_bits;
312 while (page_start <= page_end) {
313 blocks[page_start] = new_buffer;
314 ++page_start;
315 }
316 return new_buffer;
317 }
318
319 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
320 const std::size_t size_1 = first->GetSize();
321 const std::size_t size_2 = second->GetSize();
322 const CacheAddr first_addr = first->GetCacheAddr();
323 const CacheAddr second_addr = second->GetCacheAddr();
324 const CacheAddr new_addr = std::min(first_addr, second_addr);
325 const std::size_t new_size = size_1 + size_2;
326 TBuffer new_buffer = CreateBlock(new_addr, new_size);
327 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
328 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
329 first->SetEpoch(epoch);
330 second->SetEpoch(epoch);
331 pending_destruction.push_back(first);
332 pending_destruction.push_back(second);
333 const CacheAddr cache_addr_end = new_addr + new_size - 1;
334 u64 page_start = new_addr >> block_page_bits;
335 const u64 page_end = cache_addr_end >> block_page_bits;
336 while (page_start <= page_end) {
337 blocks[page_start] = new_buffer;
338 ++page_start;
339 }
340 return new_buffer;
341 }
342
343 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
344 TBuffer found{};
345 const CacheAddr cache_addr_end = cache_addr + size - 1;
346 u64 page_start = cache_addr >> block_page_bits;
347 const u64 page_end = cache_addr_end >> block_page_bits;
348 while (page_start <= page_end) {
349 auto it = blocks.find(page_start);
350 if (it == blocks.end()) {
351 if (found) {
352 found = EnlargeBlock(found);
353 } else {
354 const CacheAddr start_addr = (page_start << block_page_bits);
355 found = CreateBlock(start_addr, block_page_size);
356 blocks[page_start] = found;
357 }
358 } else {
359 if (found) {
360 if (found == it->second) {
361 ++page_start;
362 continue;
363 }
364 found = MergeBlocks(found, it->second);
365 } else {
366 found = it->second;
367 }
368 }
369 ++page_start;
370 }
371 return found;
372 }
373
374 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
375 u64 page_start = start >> write_page_bit;
376 const u64 page_end = end >> write_page_bit;
377 while (page_start <= page_end) {
378 auto it = written_pages.find(page_start);
379 if (it != written_pages.end()) {
380 it->second = it->second + 1;
381 } else {
382 written_pages[page_start] = 1;
383 }
384 page_start++;
385 }
386 }
387
388 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
389 u64 page_start = start >> write_page_bit;
390 const u64 page_end = end >> write_page_bit;
391 while (page_start <= page_end) {
392 auto it = written_pages.find(page_start);
393 if (it != written_pages.end()) {
394 if (it->second > 1) {
395 it->second = it->second - 1;
396 } else {
397 written_pages.erase(it);
398 }
399 }
400 page_start++;
401 }
402 }
403
404 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
405 u64 page_start = start >> write_page_bit;
406 const u64 page_end = end >> write_page_bit;
407 while (page_start <= page_end) {
408 if (written_pages.count(page_start) > 0) {
409 return true;
410 }
411 page_start++;
412 }
413 return false;
414 }
415
416 VideoCore::RasterizerInterface& rasterizer;
417 Core::System& system;
418 std::unique_ptr<StreamBuffer> stream_buffer;
419
420 TBufferType stream_buffer_handle{};
421
422 bool invalidated = false;
423
424 u8* buffer_ptr = nullptr;
425 u64 buffer_offset = 0;
426 u64 buffer_offset_base = 0;
427
428 using IntervalSet = boost::icl::interval_set<CacheAddr>;
429 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
430 using IntervalType = typename IntervalCache::interval_type;
431 IntervalCache mapped_addresses{};
432
433 static constexpr u64 write_page_bit{11};
434 std::unordered_map<u64, u32> written_pages{};
435
436 static constexpr u64 block_page_bits{21};
437 static constexpr u64 block_page_size{1 << block_page_bits};
438 std::unordered_map<u64, TBuffer> blocks{};
439
440 std::list<TBuffer> pending_destruction{};
441 u64 epoch{};
442 u64 modified_ticks{};
443
444 std::recursive_mutex mutex;
445};
446
447} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..3a104d5cd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/gpu.h"
9
10namespace VideoCommon {
11
12class MapIntervalBase {
13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16
17 void SetCpuAddress(VAddr new_cpu_addr) {
18 cpu_addr = new_cpu_addr;
19 }
20
21 VAddr GetCpuAddress() const {
22 return cpu_addr;
23 }
24
25 GPUVAddr GetGpuAddress() const {
26 return gpu_addr;
27 }
28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
30 return (start <= other_start && other_end <= end);
31 }
32
33 bool operator==(const MapIntervalBase& rhs) const {
34 return std::tie(start, end) == std::tie(rhs.start, rhs.end);
35 }
36
37 bool operator!=(const MapIntervalBase& rhs) const {
38 return !operator==(rhs);
39 }
40
41 void MarkAsRegistered(const bool registered) {
42 is_registered = registered;
43 }
44
45 bool IsRegistered() const {
46 return is_registered;
47 }
48
49 CacheAddr GetStart() const {
50 return start;
51 }
52
53 CacheAddr GetEnd() const {
54 return end;
55 }
56
57 void MarkAsModified(const bool is_modified_, const u64 tick) {
58 is_modified = is_modified_;
59 ticks = tick;
60 }
61
62 bool IsModified() const {
63 return is_modified;
64 }
65
66 u64 GetModificationTick() const {
67 return ticks;
68 }
69
70 void MarkAsWritten(const bool is_written_) {
71 is_written = is_written_;
72 }
73
74 bool IsWritten() const {
75 return is_written;
76 }
77
78private:
79 CacheAddr start;
80 CacheAddr end;
81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{};
83 bool is_written{};
84 bool is_modified{};
85 bool is_registered{};
86 u64 ticks{};
87};
88
89} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..0094fd715 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); 25 gpu.Maxwell3D().dirty.OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
@@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() {
31 break; 31 break;
32 } 32 }
33 } 33 }
34 gpu.FlushCommands();
34} 35}
35 36
36bool DmaPusher::Step() { 37bool DmaPusher::Step() {
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 082a40cd9..d44ad0cd8 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -36,10 +36,10 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
36 } else { 36 } else {
37 UNIMPLEMENTED_IF(regs.dest.z != 0); 37 UNIMPLEMENTED_IF(regs.dest.z != 0);
38 UNIMPLEMENTED_IF(regs.dest.depth != 1); 38 UNIMPLEMENTED_IF(regs.dest.depth != 1);
39 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); 39 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0);
40 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); 40 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0);
41 const std::size_t dst_size = Tegra::Texture::CalculateSize( 41 const std::size_t dst_size = Tegra::Texture::CalculateSize(
42 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); 42 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0);
43 tmp_buffer.resize(dst_size); 43 tmp_buffer.resize(dst_size);
44 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); 44 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
45 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, 45 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index ef4f5839a..462da419e 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -39,15 +39,15 @@ struct Registers {
39 } 39 }
40 40
41 u32 BlockWidth() const { 41 u32 BlockWidth() const {
42 return 1U << block_width.Value(); 42 return block_width.Value();
43 } 43 }
44 44
45 u32 BlockHeight() const { 45 u32 BlockHeight() const {
46 return 1U << block_height.Value(); 46 return block_height.Value();
47 } 47 }
48 48
49 u32 BlockDepth() const { 49 u32 BlockDepth() const {
50 return 1U << block_depth.Value(); 50 return block_depth.Value();
51 } 51 }
52 } dest; 52 } dest;
53}; 53};
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 55966eef1..98a8b5337 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -4,15 +4,13 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/math_util.h"
8#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
9#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
11 10
12namespace Tegra::Engines { 11namespace Tegra::Engines {
13 12
14Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
15 : rasterizer{rasterizer}, memory_manager{memory_manager} {}
16 14
17void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 15void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 16 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -35,21 +33,31 @@ void Fermi2D::HandleSurfaceCopy() {
35 static_cast<u32>(regs.operation)); 33 static_cast<u32>(regs.operation));
36 34
37 // TODO(Subv): Only raw copies are implemented. 35 // TODO(Subv): Only raw copies are implemented.
38 ASSERT(regs.operation == Regs::Operation::SrcCopy); 36 ASSERT(regs.operation == Operation::SrcCopy);
39 37
40 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; 38 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
41 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; 39 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
42 const u32 src_blit_x2{ 40 u32 src_blit_x2, src_blit_y2;
43 static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; 41 if (regs.blit_control.origin == Origin::Corner) {
44 const u32 src_blit_y2{ 42 src_blit_x2 =
45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; 43 static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
46 44 src_blit_y2 =
45 static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
46 } else {
47 src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
48 src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
49 }
47 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; 50 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
48 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, 51 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
49 regs.blit_dst_x + regs.blit_dst_width, 52 regs.blit_dst_x + regs.blit_dst_width,
50 regs.blit_dst_y + regs.blit_dst_height}; 53 regs.blit_dst_y + regs.blit_dst_height};
54 Config copy_config;
55 copy_config.operation = regs.operation;
56 copy_config.filter = regs.blit_control.filter;
57 copy_config.src_rect = src_rect;
58 copy_config.dst_rect = dst_rect;
51 59
52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { 60 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
53 UNIMPLEMENTED(); 61 UNIMPLEMENTED();
54 } 62 }
55} 63}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 45f59a4d9..0901cf2fa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -9,6 +9,7 @@
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/math_util.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
13 14
14namespace Tegra { 15namespace Tegra {
@@ -32,12 +33,32 @@ namespace Tegra::Engines {
32 33
33class Fermi2D final { 34class Fermi2D final {
34public: 35public:
35 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
36 ~Fermi2D() = default; 37 ~Fermi2D() = default;
37 38
38 /// Write the value to the register identified by method. 39 /// Write the value to the register identified by method.
39 void CallMethod(const GPU::MethodCall& method_call); 40 void CallMethod(const GPU::MethodCall& method_call);
40 41
42 enum class Origin : u32 {
43 Center = 0,
44 Corner = 1,
45 };
46
47 enum class Filter : u32 {
48 PointSample = 0, // Nearest
49 Linear = 1,
50 };
51
52 enum class Operation : u32 {
53 SrcCopyAnd = 0,
54 ROPAnd = 1,
55 Blend = 2,
56 SrcCopy = 3,
57 ROP = 4,
58 SrcCopyPremult = 5,
59 BlendPremult = 6,
60 };
61
41 struct Regs { 62 struct Regs {
42 static constexpr std::size_t NUM_REGS = 0x258; 63 static constexpr std::size_t NUM_REGS = 0x258;
43 64
@@ -63,32 +84,19 @@ public:
63 } 84 }
64 85
65 u32 BlockWidth() const { 86 u32 BlockWidth() const {
66 // The block width is stored in log2 format. 87 return block_width.Value();
67 return 1 << block_width;
68 } 88 }
69 89
70 u32 BlockHeight() const { 90 u32 BlockHeight() const {
71 // The block height is stored in log2 format. 91 return block_height.Value();
72 return 1 << block_height;
73 } 92 }
74 93
75 u32 BlockDepth() const { 94 u32 BlockDepth() const {
76 // The block depth is stored in log2 format. 95 return block_depth.Value();
77 return 1 << block_depth;
78 } 96 }
79 }; 97 };
80 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); 98 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
81 99
82 enum class Operation : u32 {
83 SrcCopyAnd = 0,
84 ROPAnd = 1,
85 Blend = 2,
86 SrcCopy = 3,
87 ROP = 4,
88 SrcCopyPremult = 5,
89 BlendPremult = 6,
90 };
91
92 union { 100 union {
93 struct { 101 struct {
94 INSERT_PADDING_WORDS(0x80); 102 INSERT_PADDING_WORDS(0x80);
@@ -105,7 +113,11 @@ public:
105 113
106 INSERT_PADDING_WORDS(0x177); 114 INSERT_PADDING_WORDS(0x177);
107 115
108 u32 blit_control; 116 union {
117 u32 raw;
118 BitField<0, 1, Origin> origin;
119 BitField<4, 1, Filter> filter;
120 } blit_control;
109 121
110 INSERT_PADDING_WORDS(0x8); 122 INSERT_PADDING_WORDS(0x8);
111 123
@@ -124,9 +136,15 @@ public:
124 }; 136 };
125 } regs{}; 137 } regs{};
126 138
139 struct Config {
140 Operation operation;
141 Filter filter;
142 Common::Rectangle<u32> src_rect;
143 Common::Rectangle<u32> dst_rect;
144 };
145
127private: 146private:
128 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
129 MemoryManager& memory_manager;
130 148
131 /// Performs the copy from the source surface to the destination surface as configured in the 149 /// Performs the copy from the source surface to the destination surface as configured in the
132 /// registers. 150 /// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..63d449135 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <bitset>
5#include "common/assert.h" 6#include "common/assert.h"
6#include "common/logging/log.h" 7#include "common/logging/log.h"
7#include "core/core.h" 8#include "core/core.h"
@@ -37,7 +38,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 38 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 39 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 40 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 41 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 42 }
42 break; 43 break;
43 } 44 }
@@ -49,14 +50,67 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
49 } 50 }
50} 51}
51 52
52void KeplerCompute::ProcessLaunch() { 53Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
54 const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
55 ASSERT(cbuf_mask[regs.tex_cb_index]);
56
57 const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
58 ASSERT(texinfo.Address() != 0);
59
60 const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
61 ASSERT(address < texinfo.Address() + texinfo.size);
62
63 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
64 return GetTextureInfo(tex_handle, offset);
65}
53 66
67Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
68 std::size_t offset) const {
69 return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
70 GetTSCEntry(tex_handle.tsc_id)};
71}
72
73u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
74 const auto& buffer = launch_description.const_buffer_config[const_buffer];
75 u32 result;
76 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
77 return result;
78}
79
80void KeplerCompute::ProcessLaunch() {
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 81 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 82 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 83 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57 84
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; 85 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); 86 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
87
88 rasterizer.DispatchCompute(code_addr);
89}
90
91Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
92 const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
93
94 Texture::TICEntry tic_entry;
95 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
96
97 const auto r_type{tic_entry.r_type.Value()};
98 const auto g_type{tic_entry.g_type.Value()};
99 const auto b_type{tic_entry.b_type.Value()};
100 const auto a_type{tic_entry.a_type.Value()};
101
102 // TODO(Subv): Different data types for separate components are not supported
103 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
104
105 return tic_entry;
106}
107
108Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
109 const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
110
111 Texture::TSCEntry tsc_entry;
112 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
113 return tsc_entry;
60} 114}
61 115
62} // namespace Tegra::Engines 116} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 6a3309a2c..90cf650d2 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,6 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_upload.h" 13#include "video_core/engines/engine_upload.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/textures/texture.h"
15 16
16namespace Core { 17namespace Core {
17class System; 18class System;
@@ -111,7 +112,7 @@ public:
111 112
112 INSERT_PADDING_WORDS(0x3FE); 113 INSERT_PADDING_WORDS(0x3FE);
113 114
114 u32 texture_const_buffer_index; 115 u32 tex_cb_index;
115 116
116 INSERT_PADDING_WORDS(0x374); 117 INSERT_PADDING_WORDS(0x374);
117 }; 118 };
@@ -149,7 +150,7 @@ public:
149 union { 150 union {
150 BitField<0, 8, u32> const_buffer_enable_mask; 151 BitField<0, 8, u32> const_buffer_enable_mask;
151 BitField<29, 2, u32> cache_layout; 152 BitField<29, 2, u32> cache_layout;
152 } memory_config; 153 };
153 154
154 INSERT_PADDING_WORDS(0x8); 155 INSERT_PADDING_WORDS(0x8);
155 156
@@ -194,6 +195,14 @@ public:
194 /// Write the value to the register identified by method. 195 /// Write the value to the register identified by method.
195 void CallMethod(const GPU::MethodCall& method_call); 196 void CallMethod(const GPU::MethodCall& method_call);
196 197
198 Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
199
200 /// Given a Texture Handle, returns the TSC and TIC entries.
201 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
202 std::size_t offset) const;
203
204 u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
205
197private: 206private:
198 Core::System& system; 207 Core::System& system;
199 VideoCore::RasterizerInterface& rasterizer; 208 VideoCore::RasterizerInterface& rasterizer;
@@ -201,6 +210,12 @@ private:
201 Upload::State upload_state; 210 Upload::State upload_state;
202 211
203 void ProcessLaunch(); 212 void ProcessLaunch();
213
214 /// Retrieves information about a specific TIC entry from the TIC buffer.
215 Texture::TICEntry GetTICEntry(u32 tic_index) const;
216
217 /// Retrieves information about a specific TSC entry from the TSC buffer.
218 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
204}; 219};
205 220
206#define ASSERT_REG_POSITION(field_name, position) \ 221#define ASSERT_REG_POSITION(field_name, position) \
@@ -218,12 +233,12 @@ ASSERT_REG_POSITION(launch, 0xAF);
218ASSERT_REG_POSITION(tsc, 0x557); 233ASSERT_REG_POSITION(tsc, 0x557);
219ASSERT_REG_POSITION(tic, 0x55D); 234ASSERT_REG_POSITION(tic, 0x55D);
220ASSERT_REG_POSITION(code_loc, 0x582); 235ASSERT_REG_POSITION(code_loc, 0x582);
221ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); 236ASSERT_REG_POSITION(tex_cb_index, 0x982);
222ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); 237ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
223ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); 238ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
224ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); 239ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
225ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); 240ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
226ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); 241ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
227ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); 242ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
228 243
229#undef ASSERT_REG_POSITION 244#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..fa4a7c5c1 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -15,7 +15,7 @@
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) 17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
18 : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} 18 : system{system}, upload_state{memory_manager, regs.upload} {}
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f3bc675a9..e0e25c321 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -65,7 +65,6 @@ public:
65 65
66private: 66private:
67 Core::System& system; 67 Core::System& system;
68 MemoryManager& memory_manager;
69 Upload::State upload_state; 68 Upload::State upload_state;
70}; 69};
71 70
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 08d553696..c8c92757a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,21 +91,168 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
247void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
92 250
93 // Lookup the macro offset 251 // Lookup the macro offset
94 const u32 entry{(method - MacroRegistersStart) >> 1}; 252 const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
95 const auto& search{macro_offsets.find(entry)};
96 if (search == macro_offsets.end()) {
97 LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
98 UNREACHABLE();
99 return;
100 }
101 253
102 // Execute the current macro. 254 // Execute the current macro.
103 macro_interpreter.Execute(search->second, std::move(parameters)); 255 macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
104} 256}
105 257
106void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 258void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -108,6 +260,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 260
109 const u32 method = method_call.method; 261 const u32 method = method_call.method;
110 262
263 if (method == cb_data_state.current) {
264 regs.reg_array[method] = method_call.argument;
265 ProcessCBData(method_call.argument);
266 return;
267 } else if (cb_data_state.current != null_cb_data) {
268 FinishCBData();
269 }
270
111 // It is an error to write to a register other than the current macro's ARG register before it 271 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 272 // has finished execution.
113 if (executing_macro != 0) { 273 if (executing_macro != 0) {
@@ -129,7 +289,8 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
129 289
130 // Call the macro when there are no more parameters in the command buffer 290 // Call the macro when there are no more parameters in the command buffer
131 if (method_call.IsLastCall()) { 291 if (method_call.IsLastCall()) {
132 CallMacroMethod(executing_macro, std::move(macro_params)); 292 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
293 macro_params.clear();
133 } 294 }
134 return; 295 return;
135 } 296 }
@@ -143,49 +304,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 304
144 if (regs.reg_array[method] != method_call.argument) { 305 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 306 regs.reg_array[method] = method_call.argument;
146 // Color buffers 307 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 308 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 309 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 310 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 311 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 312 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 313 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 314 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 315 dirty.vertex_instances = true;
155 // Zeta buffer 316 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 317 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 318 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 319 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 320 }
190 } 321 }
191 322
@@ -214,7 +345,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 345 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 346 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 347 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 348 StartCBData(method);
218 break; 349 break;
219 } 350 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 351 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -249,6 +380,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
249 ProcessQueryGet(); 380 ProcessQueryGet();
250 break; 381 break;
251 } 382 }
383 case MAXWELL3D_REG_INDEX(condition.mode): {
384 ProcessQueryCondition();
385 break;
386 }
252 case MAXWELL3D_REG_INDEX(sync_info): { 387 case MAXWELL3D_REG_INDEX(sync_info): {
253 ProcessSyncPoint(); 388 ProcessSyncPoint();
254 break; 389 break;
@@ -261,7 +396,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 396 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 397 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 398 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 399 dirty.OnMemoryWrite();
265 } 400 }
266 break; 401 break;
267 } 402 }
@@ -281,7 +416,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
281} 416}
282 417
283void Maxwell3D::ProcessMacroBind(u32 data) { 418void Maxwell3D::ProcessMacroBind(u32 data) {
284 macro_offsets[regs.macros.entry] = data; 419 macro_positions[regs.macros.entry++] = data;
285} 420}
286 421
287void Maxwell3D::ProcessQueryGet() { 422void Maxwell3D::ProcessQueryGet() {
@@ -302,6 +437,7 @@ void Maxwell3D::ProcessQueryGet() {
302 result = regs.query.query_sequence; 437 result = regs.query.query_sequence;
303 break; 438 break;
304 default: 439 default:
440 result = 1;
305 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 441 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
306 static_cast<u32>(regs.query.query_get.select.Value())); 442 static_cast<u32>(regs.query.query_get.select.Value()));
307 } 443 }
@@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 469 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 470 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 471 }
336 dirty_flags.OnMemoryWrite();
337 break; 472 break;
338 } 473 }
339 default: 474 default:
@@ -342,12 +477,52 @@ void Maxwell3D::ProcessQueryGet() {
342 } 477 }
343} 478}
344 479
480void Maxwell3D::ProcessQueryCondition() {
481 const GPUVAddr condition_address{regs.condition.Address()};
482 switch (regs.condition.mode) {
483 case Regs::ConditionMode::Always: {
484 execute_on = true;
485 break;
486 }
487 case Regs::ConditionMode::Never: {
488 execute_on = false;
489 break;
490 }
491 case Regs::ConditionMode::ResNonZero: {
492 Regs::QueryCompare cmp;
493 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
494 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
495 break;
496 }
497 case Regs::ConditionMode::Equal: {
498 Regs::QueryCompare cmp;
499 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
500 execute_on =
501 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
502 break;
503 }
504 case Regs::ConditionMode::NotEqual: {
505 Regs::QueryCompare cmp;
506 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
507 execute_on =
508 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
509 break;
510 }
511 default: {
512 UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
513 execute_on = true;
514 break;
515 }
516 }
517}
518
345void Maxwell3D::ProcessSyncPoint() { 519void Maxwell3D::ProcessSyncPoint() {
346 const u32 sync_point = regs.sync_info.sync_point.Value(); 520 const u32 sync_point = regs.sync_info.sync_point.Value();
347 const u32 increment = regs.sync_info.increment.Value(); 521 const u32 increment = regs.sync_info.increment.Value();
348 const u32 cache_flush = regs.sync_info.unknown.Value(); 522 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
349 LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, 523 if (increment) {
350 cache_flush); 524 system.GPU().IncrementSyncPoint(sync_point);
525 }
351} 526}
352 527
353void Maxwell3D::DrawArrays() { 528void Maxwell3D::DrawArrays() {
@@ -405,23 +580,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 580}
406 581
407void Maxwell3D::ProcessCBData(u32 value) { 582void Maxwell3D::ProcessCBData(u32 value) {
583 const u32 id = cb_data_state.id;
584 cb_data_state.buffer[id][cb_data_state.counter] = value;
585 // Increment the current buffer position.
586 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
587 cb_data_state.counter++;
588}
589
590void Maxwell3D::StartCBData(u32 method) {
591 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
592 cb_data_state.start_pos = regs.const_buffer.cb_pos;
593 cb_data_state.id = method - first_cb_data;
594 cb_data_state.current = method;
595 cb_data_state.counter = 0;
596 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
597}
598
599void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 600 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 601 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 602 ASSERT(buffer_address != 0);
411 603
412 // Don't allow writing past the end of the buffer. 604 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 605 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
416 606
417 u8* ptr{memory_manager.GetPointer(address)}; 607 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 608 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
419 memory_manager.Write<u32>(address, value);
420 609
421 dirty_flags.OnMemoryWrite(); 610 const u32 id = cb_data_state.id;
611 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
612 dirty.OnMemoryWrite();
422 613
423 // Increment the current buffer position. 614 cb_data_state.id = null_cb_data;
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 615 cb_data_state.current = null_cb_data;
425} 616}
426 617
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 618Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
@@ -430,14 +621,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
430 Texture::TICEntry tic_entry; 621 Texture::TICEntry tic_entry;
431 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 622 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
432 623
433 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 624 [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
434 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 625 [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
435 "TIC versions other than BlockLinear or Pitch are unimplemented"); 626 [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
436 627 [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
437 const auto r_type = tic_entry.r_type.Value();
438 const auto g_type = tic_entry.g_type.Value();
439 const auto b_type = tic_entry.b_type.Value();
440 const auto a_type = tic_entry.a_type.Value();
441 628
442 // TODO(Subv): Different data types for separate components are not supported 629 // TODO(Subv): Different data types for separate components are not supported
443 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 630 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 13e314944..f67a5389f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -62,11 +62,13 @@ public:
62 static constexpr std::size_t NumVertexAttributes = 32; 62 static constexpr std::size_t NumVertexAttributes = 32;
63 static constexpr std::size_t NumVaryings = 31; 63 static constexpr std::size_t NumVaryings = 31;
64 static constexpr std::size_t NumTextureSamplers = 32; 64 static constexpr std::size_t NumTextureSamplers = 32;
65 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
65 static constexpr std::size_t NumClipDistances = 8; 66 static constexpr std::size_t NumClipDistances = 8;
66 static constexpr std::size_t MaxShaderProgram = 6; 67 static constexpr std::size_t MaxShaderProgram = 6;
67 static constexpr std::size_t MaxShaderStage = 5; 68 static constexpr std::size_t MaxShaderStage = 5;
68 // Maximum number of const buffers per shader stage. 69 // Maximum number of const buffers per shader stage.
69 static constexpr std::size_t MaxConstBuffers = 18; 70 static constexpr std::size_t MaxConstBuffers = 18;
71 static constexpr std::size_t MaxConstBufferSize = 0x10000;
70 72
71 enum class QueryMode : u32 { 73 enum class QueryMode : u32 {
72 Write = 0, 74 Write = 0,
@@ -89,6 +91,20 @@ public:
89 91
90 enum class QuerySelect : u32 { 92 enum class QuerySelect : u32 {
91 Zero = 0, 93 Zero = 0,
94 TimeElapsed = 2,
95 TransformFeedbackPrimitivesGenerated = 11,
96 PrimitivesGenerated = 18,
97 SamplesPassed = 21,
98 TransformFeedbackUnknown = 26,
99 };
100
101 struct QueryCompare {
102 u32 initial_sequence;
103 u32 initial_mode;
104 u32 unknown1;
105 u32 unknown2;
106 u32 current_sequence;
107 u32 current_mode;
92 }; 108 };
93 109
94 enum class QuerySyncCondition : u32 { 110 enum class QuerySyncCondition : u32 {
@@ -96,6 +112,14 @@ public:
96 GreaterThan = 1, 112 GreaterThan = 1,
97 }; 113 };
98 114
115 enum class ConditionMode : u32 {
116 Never = 0,
117 Always = 1,
118 ResNonZero = 2,
119 Equal = 3,
120 NotEqual = 4,
121 };
122
99 enum class ShaderProgram : u32 { 123 enum class ShaderProgram : u32 {
100 VertexA = 0, 124 VertexA = 0,
101 VertexB = 1, 125 VertexB = 1,
@@ -814,7 +838,18 @@ public:
814 BitField<4, 1, u32> alpha_to_one; 838 BitField<4, 1, u32> alpha_to_one;
815 } multisample_control; 839 } multisample_control;
816 840
817 INSERT_PADDING_WORDS(0x7); 841 INSERT_PADDING_WORDS(0x4);
842
843 struct {
844 u32 address_high;
845 u32 address_low;
846 ConditionMode mode;
847
848 GPUVAddr Address() const {
849 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
850 address_low);
851 }
852 } condition;
818 853
819 struct { 854 struct {
820 u32 tsc_address_high; 855 u32 tsc_address_high;
@@ -1123,23 +1158,77 @@ public:
1123 1158
1124 State state{}; 1159 State state{};
1125 1160
1126 struct DirtyFlags { 1161 struct DirtyRegs {
1127 std::bitset<8> color_buffer{0xFF}; 1162 static constexpr std::size_t NUM_REGS = 256;
1128 std::bitset<32> vertex_array{0xFFFFFFFF}; 1163 union {
1164 struct {
1165 bool null_dirty;
1166
1167 // Vertex Attributes
1168 bool vertex_attrib_format;
1169
1170 // Vertex Arrays
1171 std::array<bool, 32> vertex_array;
1172
1173 bool vertex_array_buffers;
1174
1175 // Vertex Instances
1176 std::array<bool, 32> vertex_instance;
1129 1177
1130 bool vertex_attrib_format = true; 1178 bool vertex_instances;
1131 bool zeta_buffer = true; 1179
1132 bool shaders = true; 1180 // Render Targets
1181 std::array<bool, 8> render_target;
1182 bool depth_buffer;
1183
1184 bool render_settings;
1185
1186 // Shaders
1187 bool shaders;
1188
1189 // Rasterizer State
1190 bool viewport;
1191 bool clip_coefficient;
1192 bool cull_mode;
1193 bool primitive_restart;
1194 bool depth_test;
1195 bool stencil_test;
1196 bool blend_state;
1197 bool scissor_test;
1198 bool transform_feedback;
1199 bool color_mask;
1200 bool polygon_offset;
1201
1202 // Complementary
1203 bool viewport_transform;
1204 bool screen_y_control;
1205
1206 bool memory_general;
1207 };
1208 std::array<bool, NUM_REGS> regs;
1209 };
1210
1211 void ResetVertexArrays() {
1212 vertex_array.fill(true);
1213 vertex_array_buffers = true;
1214 }
1215
1216 void ResetRenderTargets() {
1217 depth_buffer = true;
1218 render_target.fill(true);
1219 render_settings = true;
1220 }
1133 1221
1134 void OnMemoryWrite() { 1222 void OnMemoryWrite() {
1135 zeta_buffer = true;
1136 shaders = true; 1223 shaders = true;
1137 color_buffer.set(); 1224 memory_general = true;
1138 vertex_array.set(); 1225 ResetRenderTargets();
1226 ResetVertexArrays();
1139 } 1227 }
1140 };
1141 1228
1142 DirtyFlags dirty_flags; 1229 } dirty{};
1230
1231 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1143 1232
1144 /// Reads a register value located at the input method address 1233 /// Reads a register value located at the input method address
1145 u32 GetRegisterValue(u32 method) const; 1234 u32 GetRegisterValue(u32 method) const;
@@ -1168,6 +1257,10 @@ public:
1168 return macro_memory; 1257 return macro_memory;
1169 } 1258 }
1170 1259
1260 bool ShouldExecute() const {
1261 return execute_on;
1262 }
1263
1171private: 1264private:
1172 void InitializeRegisterDefaults(); 1265 void InitializeRegisterDefaults();
1173 1266
@@ -1178,7 +1271,7 @@ private:
1178 MemoryManager& memory_manager; 1271 MemoryManager& memory_manager;
1179 1272
1180 /// Start offsets of each macro in macro_memory 1273 /// Start offsets of each macro in macro_memory
1181 std::unordered_map<u32, u32> macro_offsets; 1274 std::array<u32, 0x80> macro_positions = {};
1182 1275
1183 /// Memory for macro code 1276 /// Memory for macro code
1184 MacroMemory macro_memory; 1277 MacroMemory macro_memory;
@@ -1191,20 +1284,34 @@ private:
1191 /// Interpreter for the macro codes uploaded to the GPU. 1284 /// Interpreter for the macro codes uploaded to the GPU.
1192 MacroInterpreter macro_interpreter; 1285 MacroInterpreter macro_interpreter;
1193 1286
1287 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1288 struct {
1289 std::array<std::array<u32, 0x4000>, 16> buffer;
1290 u32 current{null_cb_data};
1291 u32 id{null_cb_data};
1292 u32 start_pos{};
1293 u32 counter{};
1294 } cb_data_state;
1295
1194 Upload::State upload_state; 1296 Upload::State upload_state;
1195 1297
1298 bool execute_on{true};
1299
1196 /// Retrieves information about a specific TIC entry from the TIC buffer. 1300 /// Retrieves information about a specific TIC entry from the TIC buffer.
1197 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1301 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1198 1302
1199 /// Retrieves information about a specific TSC entry from the TSC buffer. 1303 /// Retrieves information about a specific TSC entry from the TSC buffer.
1200 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1304 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1201 1305
1306 void InitDirtySettings();
1307
1202 /** 1308 /**
1203 * Call a macro on this engine. 1309 * Call a macro on this engine.
1204 * @param method Method to call 1310 * @param method Method to call
1311 * @param num_parameters Number of arguments
1205 * @param parameters Arguments to the method call 1312 * @param parameters Arguments to the method call
1206 */ 1313 */
1207 void CallMacroMethod(u32 method, std::vector<u32> parameters); 1314 void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
1208 1315
1209 /// Handles writes to the macro uploading register. 1316 /// Handles writes to the macro uploading register.
1210 void ProcessMacroUpload(u32 data); 1317 void ProcessMacroUpload(u32 data);
@@ -1218,11 +1325,16 @@ private:
1218 /// Handles a write to the QUERY_GET register. 1325 /// Handles a write to the QUERY_GET register.
1219 void ProcessQueryGet(); 1326 void ProcessQueryGet();
1220 1327
1328 // Handles Conditional Rendering
1329 void ProcessQueryCondition();
1330
1221 /// Handles writes to syncing register. 1331 /// Handles writes to syncing register.
1222 void ProcessSyncPoint(); 1332 void ProcessSyncPoint();
1223 1333
1224 /// Handles a write to the CB_DATA[i] register. 1334 /// Handles a write to the CB_DATA[i] register.
1335 void StartCBData(u32 method);
1225 void ProcessCBData(u32 value); 1336 void ProcessCBData(u32 value);
1337 void FinishCBData();
1226 1338
1227 /// Handles a write to the CB_BIND register. 1339 /// Handles a write to the CB_BIND register.
1228 void ProcessCBBind(Regs::ShaderStage stage); 1340 void ProcessCBBind(Regs::ShaderStage stage);
@@ -1289,6 +1401,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1289ASSERT_REG_POSITION(point_size, 0x546); 1401ASSERT_REG_POSITION(point_size, 0x546);
1290ASSERT_REG_POSITION(zeta_enable, 0x54E); 1402ASSERT_REG_POSITION(zeta_enable, 0x54E);
1291ASSERT_REG_POSITION(multisample_control, 0x54F); 1403ASSERT_REG_POSITION(multisample_control, 0x54F);
1404ASSERT_REG_POSITION(condition, 0x554);
1292ASSERT_REG_POSITION(tsc, 0x557); 1405ASSERT_REG_POSITION(tsc, 0x557);
1293ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1406ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1294ASSERT_REG_POSITION(tic, 0x55D); 1407ASSERT_REG_POSITION(tic, 0x55D);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 3a5dfef0c..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,18 +5,17 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/settings.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
10#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
14 14
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
18 MemoryManager& memory_manager) 18 : system{system}, memory_manager{memory_manager} {}
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
20 19
21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 20void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
22 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 21 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -38,7 +37,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
38} 37}
39 38
40void MaxwellDMA::HandleCopy() { 39void MaxwellDMA::HandleCopy() {
41 LOG_WARNING(HW_GPU, "Requested a DMA copy"); 40 LOG_TRACE(HW_GPU, "Requested a DMA copy");
42 41
43 const GPUVAddr source = regs.src_address.Address(); 42 const GPUVAddr source = regs.src_address.Address();
44 const GPUVAddr dest = regs.dst_address.Address(); 43 const GPUVAddr dest = regs.dst_address.Address();
@@ -58,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
58 } 57 }
59 58
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 59 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 60 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 61
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
@@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() {
84 ASSERT(regs.exec.enable_2d == 1); 83 ASSERT(regs.exec.enable_2d == 1);
85 84
86 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 85 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
87 ASSERT(regs.src_params.size_z == 1); 86 ASSERT(regs.src_params.BlockDepth() == 0);
88 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 87 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
89 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 88 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
90 const std::size_t src_size = Texture::CalculateSize( 89 const std::size_t src_size = Texture::CalculateSize(
91 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 90 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
92 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); 91 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
93 92
93 const std::size_t src_layer_size = Texture::CalculateSize(
94 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
95 regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
96
94 const std::size_t dst_size = regs.dst_pitch * regs.y_count; 97 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
95 98
96 if (read_buffer.size() < src_size) { 99 if (read_buffer.size() < src_size) {
@@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() {
104 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 107 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
105 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 108 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
106 109
107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 110 Texture::UnswizzleSubrect(
108 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), 111 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
109 write_buffer.data(), regs.src_params.BlockHeight(), 112 read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
110 regs.src_params.pos_x, regs.src_params.pos_y); 113 regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
111 114
112 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 115 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
113 } else { 116 } else {
114 ASSERT(regs.dst_params.BlockDepth() == 1); 117 ASSERT(regs.dst_params.BlockDepth() == 0);
115 118
116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; 119 const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
117 120
118 const std::size_t dst_size = Texture::CalculateSize( 121 const std::size_t dst_size = Texture::CalculateSize(
119 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 122 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
120 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 123 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
121 124
122 const std::size_t dst_layer_size = Texture::CalculateSize( 125 const std::size_t dst_layer_size = Texture::CalculateSize(
123 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, 126 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
124 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 127 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
125 128
126 const std::size_t src_size = regs.src_pitch * regs.y_count; 129 const std::size_t src_size = regs.src_pitch * regs.y_count;
@@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() {
133 write_buffer.resize(dst_size); 136 write_buffer.resize(dst_size);
134 } 137 }
135 138
136 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 139 if (Settings::values.use_accurate_gpu_emulation) {
137 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 140 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
141 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
142 } else {
143 memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
144 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
145 }
138 146
139 // If the input is linear and the output is tiled, swizzle the input and copy it over. 147 // If the input is linear and the output is tiled, swizzle the input and copy it over.
140 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 148 Texture::SwizzleSubrect(
141 src_bytes_per_pixel, 149 regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
142 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, 150 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
143 read_buffer.data(), regs.dst_params.BlockHeight()); 151 regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
144 152
145 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 153 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
146 } 154 }
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index e5942f671..93808a9bb 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -20,10 +20,6 @@ namespace Tegra {
20class MemoryManager; 20class MemoryManager;
21} 21}
22 22
23namespace VideoCore {
24class RasterizerInterface;
25}
26
27namespace Tegra::Engines { 23namespace Tegra::Engines {
28 24
29/** 25/**
@@ -33,8 +29,7 @@ namespace Tegra::Engines {
33 29
34class MaxwellDMA final { 30class MaxwellDMA final {
35public: 31public:
36 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 32 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
37 MemoryManager& memory_manager);
38 ~MaxwellDMA() = default; 33 ~MaxwellDMA() = default;
39 34
40 /// Write the value to the register identified by method. 35 /// Write the value to the register identified by method.
@@ -59,11 +54,11 @@ public:
59 }; 54 };
60 55
61 u32 BlockHeight() const { 56 u32 BlockHeight() const {
62 return 1 << block_height; 57 return block_height.Value();
63 } 58 }
64 59
65 u32 BlockDepth() const { 60 u32 BlockDepth() const {
66 return 1 << block_depth; 61 return block_depth.Value();
67 } 62 }
68 }; 63 };
69 64
@@ -180,8 +175,6 @@ public:
180private: 175private:
181 Core::System& system; 176 Core::System& system;
182 177
183 VideoCore::RasterizerInterface& rasterizer;
184
185 MemoryManager& memory_manager; 178 MemoryManager& memory_manager;
186 179
187 std::vector<u8> read_buffer; 180 std::vector<u8> read_buffer;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index ffb3ec3e0..052e6d24e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <bitset> 8#include <bitset>
8#include <optional> 9#include <optional>
9#include <tuple> 10#include <tuple>
@@ -77,7 +78,7 @@ union Attribute {
77 constexpr explicit Attribute(u64 value) : value(value) {} 78 constexpr explicit Attribute(u64 value) : value(value) {}
78 79
79 enum class Index : u64 { 80 enum class Index : u64 {
80 PointSize = 6, 81 LayerViewportPointSize = 6,
81 Position = 7, 82 Position = 7,
82 Attribute_0 = 8, 83 Attribute_0 = 8,
83 Attribute_31 = 39, 84 Attribute_31 = 39,
@@ -126,6 +127,15 @@ union Sampler {
126 u64 value{}; 127 u64 value{};
127}; 128};
128 129
130union Image {
131 Image() = default;
132
133 constexpr explicit Image(u64 value) : value{value} {}
134
135 BitField<36, 13, u64> index;
136 u64 value;
137};
138
129} // namespace Tegra::Shader 139} // namespace Tegra::Shader
130 140
131namespace std { 141namespace std {
@@ -344,6 +354,26 @@ enum class TextureMiscMode : u64 {
344 PTP, 354 PTP,
345}; 355};
346 356
357enum class SurfaceDataMode : u64 {
358 P = 0,
359 D_BA = 1,
360};
361
362enum class OutOfBoundsStore : u64 {
363 Ignore = 0,
364 Clamp = 1,
365 Trap = 2,
366};
367
368enum class ImageType : u64 {
369 Texture1D = 0,
370 TextureBuffer = 1,
371 Texture1DArray = 2,
372 Texture2D = 3,
373 Texture2DArray = 4,
374 Texture3D = 5,
375};
376
347enum class IsberdMode : u64 { 377enum class IsberdMode : u64 {
348 None = 0, 378 None = 0,
349 Patch = 1, 379 Patch = 1,
@@ -398,7 +428,7 @@ enum class LmemLoadCacheManagement : u64 {
398 CV = 3, 428 CV = 3,
399}; 429};
400 430
401enum class LmemStoreCacheManagement : u64 { 431enum class StoreCacheManagement : u64 {
402 Default = 0, 432 Default = 0,
403 CG = 1, 433 CG = 1,
404 CS = 2, 434 CS = 2,
@@ -508,6 +538,34 @@ enum class PhysicalAttributeDirection : u64 {
508 Output = 1, 538 Output = 1,
509}; 539};
510 540
541enum class VoteOperation : u64 {
542 All = 0, // allThreadsNV
543 Any = 1, // anyThreadNV
544 Eq = 2, // allThreadsEqualNV
545};
546
547enum class ImageAtomicSize : u64 {
548 U32 = 0,
549 S32 = 1,
550 U64 = 2,
551 F32 = 3,
552 S64 = 5,
553 SD32 = 6,
554 SD64 = 7,
555};
556
557enum class ImageAtomicOperation : u64 {
558 Add = 0,
559 Min = 1,
560 Max = 2,
561 Inc = 3,
562 Dec = 4,
563 And = 5,
564 Or = 6,
565 Xor = 7,
566 Exch = 8,
567};
568
511union Instruction { 569union Instruction {
512 Instruction& operator=(const Instruction& instr) { 570 Instruction& operator=(const Instruction& instr) {
513 value = instr.value; 571 value = instr.value;
@@ -530,6 +588,18 @@ union Instruction {
530 BitField<48, 16, u64> opcode; 588 BitField<48, 16, u64> opcode;
531 589
532 union { 590 union {
591 BitField<8, 5, ConditionCode> cc;
592 BitField<13, 1, u64> trigger;
593 } nop;
594
595 union {
596 BitField<48, 2, VoteOperation> operation;
597 BitField<45, 3, u64> dest_pred;
598 BitField<39, 3, u64> value;
599 BitField<42, 1, u64> negate_value;
600 } vote;
601
602 union {
533 BitField<8, 8, Register> gpr; 603 BitField<8, 8, Register> gpr;
534 BitField<20, 24, s64> offset; 604 BitField<20, 24, s64> offset;
535 } gmem; 605 } gmem;
@@ -627,6 +697,10 @@ union Instruction {
627 } shift; 697 } shift;
628 698
629 union { 699 union {
700 BitField<39, 1, u64> wrap;
701 } shr;
702
703 union {
630 BitField<39, 5, u64> shift_amount; 704 BitField<39, 5, u64> shift_amount;
631 BitField<48, 1, u64> negate_b; 705 BitField<48, 1, u64> negate_b;
632 BitField<49, 1, u64> negate_a; 706 BitField<49, 1, u64> negate_a;
@@ -811,7 +885,7 @@ union Instruction {
811 } ld_l; 885 } ld_l;
812 886
813 union { 887 union {
814 BitField<44, 2, LmemStoreCacheManagement> cache_management; 888 BitField<44, 2, StoreCacheManagement> cache_management;
815 } st_l; 889 } st_l;
816 890
817 union { 891 union {
@@ -838,6 +912,7 @@ union Instruction {
838 union { 912 union {
839 BitField<0, 3, u64> pred0; 913 BitField<0, 3, u64> pred0;
840 BitField<3, 3, u64> pred3; 914 BitField<3, 3, u64> pred3;
915 BitField<6, 1, u64> neg_b;
841 BitField<7, 1, u64> abs_a; 916 BitField<7, 1, u64> abs_a;
842 BitField<39, 3, u64> pred39; 917 BitField<39, 3, u64> pred39;
843 BitField<42, 1, u64> neg_pred; 918 BitField<42, 1, u64> neg_pred;
@@ -901,8 +976,6 @@ union Instruction {
901 } csetp; 976 } csetp;
902 977
903 union { 978 union {
904 BitField<35, 4, PredCondition> cond;
905 BitField<49, 1, u64> h_and;
906 BitField<6, 1, u64> ftz; 979 BitField<6, 1, u64> ftz;
907 BitField<45, 2, PredOperation> op; 980 BitField<45, 2, PredOperation> op;
908 BitField<3, 3, u64> pred3; 981 BitField<3, 3, u64> pred3;
@@ -910,9 +983,21 @@ union Instruction {
910 BitField<43, 1, u64> negate_a; 983 BitField<43, 1, u64> negate_a;
911 BitField<44, 1, u64> abs_a; 984 BitField<44, 1, u64> abs_a;
912 BitField<47, 2, HalfType> type_a; 985 BitField<47, 2, HalfType> type_a;
913 BitField<31, 1, u64> negate_b; 986 union {
914 BitField<30, 1, u64> abs_b; 987 BitField<35, 4, PredCondition> cond;
915 BitField<28, 2, HalfType> type_b; 988 BitField<49, 1, u64> h_and;
989 BitField<31, 1, u64> negate_b;
990 BitField<30, 1, u64> abs_b;
991 BitField<28, 2, HalfType> type_b;
992 } reg;
993 union {
994 BitField<56, 1, u64> negate_b;
995 BitField<54, 1, u64> abs_b;
996 } cbuf;
997 union {
998 BitField<49, 4, PredCondition> cond;
999 BitField<53, 1, u64> h_and;
1000 } cbuf_and_imm;
916 BitField<42, 1, u64> neg_pred; 1001 BitField<42, 1, u64> neg_pred;
917 BitField<39, 3, u64> pred39; 1002 BitField<39, 3, u64> pred39;
918 } hsetp2; 1003 } hsetp2;
@@ -961,7 +1046,6 @@ union Instruction {
961 } iset; 1046 } iset;
962 1047
963 union { 1048 union {
964 BitField<41, 2, u64> selector; // i2i and i2f only
965 BitField<45, 1, u64> negate_a; 1049 BitField<45, 1, u64> negate_a;
966 BitField<49, 1, u64> abs_a; 1050 BitField<49, 1, u64> abs_a;
967 BitField<10, 2, Register::Size> src_size; 1051 BitField<10, 2, Register::Size> src_size;
@@ -978,8 +1062,6 @@ union Instruction {
978 } f2i; 1062 } f2i;
979 1063
980 union { 1064 union {
981 BitField<8, 2, Register::Size> src_size;
982 BitField<10, 2, Register::Size> dst_size;
983 BitField<39, 4, u64> rounding; 1065 BitField<39, 4, u64> rounding;
984 // H0, H1 extract for F16 missing 1066 // H0, H1 extract for F16 missing
985 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value 1067 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
@@ -989,6 +1071,13 @@ union Instruction {
989 } 1071 }
990 } f2f; 1072 } f2f;
991 1073
1074 union {
1075 BitField<41, 2, u64> selector;
1076 } int_src;
1077
1078 union {
1079 BitField<41, 1, u64> selector;
1080 } float_src;
992 } conversion; 1081 } conversion;
993 1082
994 union { 1083 union {
@@ -1232,8 +1321,23 @@ union Instruction {
1232 } texs; 1321 } texs;
1233 1322
1234 union { 1323 union {
1324 BitField<28, 1, u64> is_array;
1325 BitField<29, 2, TextureType> texture_type;
1326 BitField<35, 1, u64> aoffi;
1327 BitField<49, 1, u64> nodep_flag;
1328 BitField<50, 1, u64> ms; // Multisample?
1329 BitField<54, 1, u64> cl;
1330 BitField<55, 1, u64> process_mode;
1331
1332 TextureProcessMode GetTextureProcessMode() const {
1333 return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
1334 }
1335 } tld;
1336
1337 union {
1235 BitField<49, 1, u64> nodep_flag; 1338 BitField<49, 1, u64> nodep_flag;
1236 BitField<53, 4, u64> texture_info; 1339 BitField<53, 4, u64> texture_info;
1340 BitField<59, 1, u64> fp32_flag;
1237 1341
1238 TextureType GetTextureType() const { 1342 TextureType GetTextureType() const {
1239 // The TLDS instruction has a weird encoding for the texture type. 1343 // The TLDS instruction has a weird encoding for the texture type.
@@ -1281,6 +1385,43 @@ union Instruction {
1281 } tlds; 1385 } tlds;
1282 1386
1283 union { 1387 union {
1388 BitField<24, 2, StoreCacheManagement> cache_management;
1389 BitField<33, 3, ImageType> image_type;
1390 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1391 BitField<51, 1, u64> is_immediate;
1392 BitField<52, 1, SurfaceDataMode> mode;
1393
1394 BitField<20, 3, StoreType> store_data_layout;
1395 BitField<20, 4, u64> component_mask_selector;
1396
1397 bool IsComponentEnabled(std::size_t component) const {
1398 ASSERT(mode == SurfaceDataMode::P);
1399 constexpr u8 R = 0b0001;
1400 constexpr u8 G = 0b0010;
1401 constexpr u8 B = 0b0100;
1402 constexpr u8 A = 0b1000;
1403 constexpr std::array<u8, 16> mask = {
1404 0, (R), (G), (R | G), (B), (R | B),
1405 (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
1406 (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
1407 return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
1408 }
1409
1410 StoreType GetStoreDataLayout() const {
1411 ASSERT(mode == SurfaceDataMode::D_BA);
1412 return store_data_layout;
1413 }
1414 } sust;
1415
1416 union {
1417 BitField<28, 1, u64> is_ba;
1418 BitField<51, 3, ImageAtomicSize> size;
1419 BitField<33, 3, ImageType> image_type;
1420 BitField<29, 4, ImageAtomicOperation> operation;
1421 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1422 } suatom_d;
1423
1424 union {
1284 BitField<20, 24, u64> target; 1425 BitField<20, 24, u64> target;
1285 BitField<5, 1, u64> constant_buffer; 1426 BitField<5, 1, u64> constant_buffer;
1286 1427
@@ -1295,6 +1436,20 @@ union Instruction {
1295 } bra; 1436 } bra;
1296 1437
1297 union { 1438 union {
1439 BitField<20, 24, u64> target;
1440 BitField<5, 1, u64> constant_buffer;
1441
1442 s32 GetBranchExtend() const {
1443 // Sign extend the branch target offset
1444 u32 mask = 1U << (24 - 1);
1445 u32 value = static_cast<u32>(target);
1446 // The branch offset is relative to the next instruction and is stored in bytes, so
1447 // divide it by the size of an instruction and add 1 to it.
1448 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
1449 }
1450 } brx;
1451
1452 union {
1298 BitField<39, 1, u64> emit; // EmitVertex 1453 BitField<39, 1, u64> emit; // EmitVertex
1299 BitField<40, 1, u64> cut; // EndPrimitive 1454 BitField<40, 1, u64> cut; // EndPrimitive
1300 } out; 1455 } out;
@@ -1371,6 +1526,7 @@ union Instruction {
1371 1526
1372 Attribute attribute; 1527 Attribute attribute;
1373 Sampler sampler; 1528 Sampler sampler;
1529 Image image;
1374 1530
1375 u64 value; 1531 u64 value;
1376}; 1532};
@@ -1385,11 +1541,13 @@ public:
1385 SYNC, 1541 SYNC,
1386 BRK, 1542 BRK,
1387 DEPBAR, 1543 DEPBAR,
1544 VOTE,
1388 BFE_C, 1545 BFE_C,
1389 BFE_R, 1546 BFE_R,
1390 BFE_IMM, 1547 BFE_IMM,
1391 BFI_IMM_R, 1548 BFI_IMM_R,
1392 BRA, 1549 BRA,
1550 BRX,
1393 PBK, 1551 PBK,
1394 LD_A, 1552 LD_A,
1395 LD_L, 1553 LD_L,
@@ -1408,12 +1566,16 @@ public:
1408 TXQ, // Texture Query 1566 TXQ, // Texture Query
1409 TXQ_B, // Texture Query Bindless 1567 TXQ_B, // Texture Query Bindless
1410 TEXS, // Texture Fetch with scalar/non-vec4 source/destinations 1568 TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
1569 TLD, // Texture Load
1411 TLDS, // Texture Load with scalar/non-vec4 source/destinations 1570 TLDS, // Texture Load with scalar/non-vec4 source/destinations
1412 TLD4, // Texture Load 4 1571 TLD4, // Texture Load 4
1413 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations 1572 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
1414 TMML_B, // Texture Mip Map Level 1573 TMML_B, // Texture Mip Map Level
1415 TMML, // Texture Mip Map Level 1574 TMML, // Texture Mip Map Level
1575 SUST, // Surface Store
1576 SUATOM, // Surface Atomic Operation
1416 EXIT, 1577 EXIT,
1578 NOP,
1417 IPA, 1579 IPA,
1418 OUT_R, // Emit vertex/primitive 1580 OUT_R, // Emit vertex/primitive
1419 ISBERD, 1581 ISBERD,
@@ -1456,7 +1618,9 @@ public:
1456 HFMA2_RC, 1618 HFMA2_RC,
1457 HFMA2_RR, 1619 HFMA2_RR,
1458 HFMA2_IMM_R, 1620 HFMA2_IMM_R,
1621 HSETP2_C,
1459 HSETP2_R, 1622 HSETP2_R,
1623 HSETP2_IMM,
1460 HSET2_R, 1624 HSET2_R,
1461 POPC_C, 1625 POPC_C,
1462 POPC_R, 1626 POPC_R,
@@ -1541,8 +1705,10 @@ public:
1541 Hfma2, 1705 Hfma2,
1542 Flow, 1706 Flow,
1543 Synch, 1707 Synch,
1708 Warp,
1544 Memory, 1709 Memory,
1545 Texture, 1710 Texture,
1711 Image,
1546 FloatSet, 1712 FloatSet,
1547 FloatSetPredicate, 1713 FloatSetPredicate,
1548 IntegerSet, 1714 IntegerSet,
@@ -1661,10 +1827,12 @@ private:
1661 INST("111000101001----", Id::SSY, Type::Flow, "SSY"), 1827 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
1662 INST("111000101010----", Id::PBK, Type::Flow, "PBK"), 1828 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
1663 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 1829 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
1830 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
1664 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), 1831 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
1665 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1832 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1666 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1833 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1667 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1834 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1835 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1668 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1836 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1669 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1837 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1670 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1838 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1682,11 +1850,15 @@ private:
1682 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), 1850 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1683 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), 1851 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
1684 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), 1852 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1685 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), 1853 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
1854 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
1686 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), 1855 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1687 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), 1856 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1688 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1857 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1689 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), 1858 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1859 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
1860 INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
1861 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
1690 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1862 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1691 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1863 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1692 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1864 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
@@ -1735,7 +1907,9 @@ private:
1735 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1907 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1736 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1908 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1737 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1909 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1738 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1910 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1911 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1912 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1739 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1913 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1740 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1914 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1741 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1915 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 52706505b..2c47541cb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,26 +17,15 @@
17 17
18namespace Tegra { 18namespace Tegra {
19 19
20u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { 20GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
21 switch (format) { 21 : system{system}, renderer{renderer}, is_async{is_async} {
22 case PixelFormat::ABGR8:
23 case PixelFormat::BGRA8:
24 return 4;
25 default:
26 return 4;
27 }
28
29 UNREACHABLE();
30}
31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()}; 22 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); 23 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 24 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 25 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 26 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
38 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); 27 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); 28 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); 29 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
41} 30}
42 31
@@ -50,6 +39,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
50 return *maxwell_3d; 39 return *maxwell_3d;
51} 40}
52 41
42Engines::KeplerCompute& GPU::KeplerCompute() {
43 return *kepler_compute;
44}
45
46const Engines::KeplerCompute& GPU::KeplerCompute() const {
47 return *kepler_compute;
48}
49
53MemoryManager& GPU::MemoryManager() { 50MemoryManager& GPU::MemoryManager() {
54 return *memory_manager; 51 return *memory_manager;
55} 52}
@@ -66,6 +63,55 @@ const DmaPusher& GPU::DmaPusher() const {
66 return *dma_pusher; 63 return *dma_pusher;
67} 64}
68 65
66void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
67 syncpoints[syncpoint_id]++;
68 std::lock_guard lock{sync_mutex};
69 if (!syncpt_interrupts[syncpoint_id].empty()) {
70 u32 value = syncpoints[syncpoint_id].load();
71 auto it = syncpt_interrupts[syncpoint_id].begin();
72 while (it != syncpt_interrupts[syncpoint_id].end()) {
73 if (value >= *it) {
74 TriggerCpuInterrupt(syncpoint_id, *it);
75 it = syncpt_interrupts[syncpoint_id].erase(it);
76 continue;
77 }
78 it++;
79 }
80 }
81}
82
83u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
84 return syncpoints[syncpoint_id].load();
85}
86
87void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
88 auto& interrupt = syncpt_interrupts[syncpoint_id];
89 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
90 [value](u32 in_value) { return in_value == value; });
91 if (contains) {
92 return;
93 }
94 syncpt_interrupts[syncpoint_id].emplace_back(value);
95}
96
97bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
98 std::lock_guard lock{sync_mutex};
99 auto& interrupt = syncpt_interrupts[syncpoint_id];
100 const auto iter =
101 std::find_if(interrupt.begin(), interrupt.end(),
102 [value](u32 interrupt_value) { return value == interrupt_value; });
103
104 if (iter == interrupt.end()) {
105 return false;
106 }
107 interrupt.erase(iter);
108 return true;
109}
110
111void GPU::FlushCommands() {
112 renderer.Rasterizer().FlushCommands();
113}
114
69u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { 115u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
70 ASSERT(format != RenderTargetFormat::NONE); 116 ASSERT(format != RenderTargetFormat::NONE);
71 117
@@ -143,12 +189,12 @@ enum class BufferMethods {
143 NotifyIntr = 0x8, 189 NotifyIntr = 0x8,
144 WrcacheFlush = 0x9, 190 WrcacheFlush = 0x9,
145 Unk28 = 0xA, 191 Unk28 = 0xA,
146 Unk2c = 0xB, 192 UnkCacheFlush = 0xB,
147 RefCnt = 0x14, 193 RefCnt = 0x14,
148 SemaphoreAcquire = 0x1A, 194 SemaphoreAcquire = 0x1A,
149 SemaphoreRelease = 0x1B, 195 SemaphoreRelease = 0x1B,
150 Unk70 = 0x1C, 196 FenceValue = 0x1C,
151 Unk74 = 0x1D, 197 FenceAction = 0x1D,
152 Unk78 = 0x1E, 198 Unk78 = 0x1E,
153 Unk7c = 0x1F, 199 Unk7c = 0x1F,
154 Yield = 0x20, 200 Yield = 0x20,
@@ -194,6 +240,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
194 case BufferMethods::SemaphoreAddressLow: 240 case BufferMethods::SemaphoreAddressLow:
195 case BufferMethods::SemaphoreSequence: 241 case BufferMethods::SemaphoreSequence:
196 case BufferMethods::RefCnt: 242 case BufferMethods::RefCnt:
243 case BufferMethods::UnkCacheFlush:
244 case BufferMethods::WrcacheFlush:
245 case BufferMethods::FenceValue:
246 case BufferMethods::FenceAction:
197 break; 247 break;
198 case BufferMethods::SemaphoreTrigger: { 248 case BufferMethods::SemaphoreTrigger: {
199 ProcessSemaphoreTriggerMethod(); 249 ProcessSemaphoreTriggerMethod();
@@ -204,21 +254,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
204 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); 254 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
205 break; 255 break;
206 } 256 }
207 case BufferMethods::WrcacheFlush: {
208 // TODO(Kmather73): Research and implement this method.
209 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
210 break;
211 }
212 case BufferMethods::Unk28: { 257 case BufferMethods::Unk28: {
213 // TODO(Kmather73): Research and implement this method. 258 // TODO(Kmather73): Research and implement this method.
214 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); 259 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
215 break; 260 break;
216 } 261 }
217 case BufferMethods::Unk2c: {
218 // TODO(Kmather73): Research and implement this method.
219 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
220 break;
221 }
222 case BufferMethods::SemaphoreAcquire: { 262 case BufferMethods::SemaphoreAcquire: {
223 ProcessSemaphoreAcquire(); 263 ProcessSemaphoreAcquire();
224 break; 264 break;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fe6628923..78bc0601a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,8 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <list>
8#include <memory> 10#include <memory>
11#include <mutex>
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/hle/service/nvdrv/nvdata.h"
10#include "core/hle/service/nvflinger/buffer_queue.h" 14#include "core/hle/service/nvflinger/buffer_queue.h"
11#include "video_core/dma_pusher.h" 15#include "video_core/dma_pusher.h"
12 16
@@ -15,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {
15 return reinterpret_cast<CacheAddr>(host_ptr); 19 return reinterpret_cast<CacheAddr>(host_ptr);
16} 20}
17 21
22inline u8* FromCacheAddr(CacheAddr cache_addr) {
23 return reinterpret_cast<u8*>(cache_addr);
24}
25
18namespace Core { 26namespace Core {
19class System; 27class System;
20} 28}
@@ -87,14 +95,10 @@ class DebugContext;
87struct FramebufferConfig { 95struct FramebufferConfig {
88 enum class PixelFormat : u32 { 96 enum class PixelFormat : u32 {
89 ABGR8 = 1, 97 ABGR8 = 1,
98 RGB565 = 4,
90 BGRA8 = 5, 99 BGRA8 = 5,
91 }; 100 };
92 101
93 /**
94 * Returns the number of bytes per pixel.
95 */
96 static u32 BytesPerPixel(PixelFormat format);
97
98 VAddr address; 102 VAddr address;
99 u32 offset; 103 u32 offset;
100 u32 width; 104 u32 width;
@@ -127,7 +131,7 @@ class MemoryManager;
127 131
128class GPU { 132class GPU {
129public: 133public:
130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); 134 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
131 135
132 virtual ~GPU(); 136 virtual ~GPU();
133 137
@@ -149,12 +153,20 @@ public:
149 /// Calls a GPU method. 153 /// Calls a GPU method.
150 void CallMethod(const MethodCall& method_call); 154 void CallMethod(const MethodCall& method_call);
151 155
156 void FlushCommands();
157
152 /// Returns a reference to the Maxwell3D GPU engine. 158 /// Returns a reference to the Maxwell3D GPU engine.
153 Engines::Maxwell3D& Maxwell3D(); 159 Engines::Maxwell3D& Maxwell3D();
154 160
155 /// Returns a const reference to the Maxwell3D GPU engine. 161 /// Returns a const reference to the Maxwell3D GPU engine.
156 const Engines::Maxwell3D& Maxwell3D() const; 162 const Engines::Maxwell3D& Maxwell3D() const;
157 163
164 /// Returns a reference to the KeplerCompute GPU engine.
165 Engines::KeplerCompute& KeplerCompute();
166
167 /// Returns a reference to the KeplerCompute GPU engine.
168 const Engines::KeplerCompute& KeplerCompute() const;
169
158 /// Returns a reference to the GPU memory manager. 170 /// Returns a reference to the GPU memory manager.
159 Tegra::MemoryManager& MemoryManager(); 171 Tegra::MemoryManager& MemoryManager();
160 172
@@ -164,6 +176,22 @@ public:
164 /// Returns a reference to the GPU DMA pusher. 176 /// Returns a reference to the GPU DMA pusher.
165 Tegra::DmaPusher& DmaPusher(); 177 Tegra::DmaPusher& DmaPusher();
166 178
179 void IncrementSyncPoint(u32 syncpoint_id);
180
181 u32 GetSyncpointValue(u32 syncpoint_id) const;
182
183 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
184
185 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
186
187 std::unique_lock<std::mutex> LockSync() {
188 return std::unique_lock{sync_mutex};
189 }
190
191 bool IsAsync() const {
192 return is_async;
193 }
194
167 /// Returns a const reference to the GPU DMA pusher. 195 /// Returns a const reference to the GPU DMA pusher.
168 const Tegra::DmaPusher& DmaPusher() const; 196 const Tegra::DmaPusher& DmaPusher() const;
169 197
@@ -194,7 +222,12 @@ public:
194 222
195 u32 semaphore_acquire; 223 u32 semaphore_acquire;
196 u32 semaphore_release; 224 u32 semaphore_release;
197 INSERT_PADDING_WORDS(0xE4); 225 u32 fence_value;
226 union {
227 BitField<4, 4, u32> operation;
228 BitField<8, 8, u32> id;
229 } fence_action;
230 INSERT_PADDING_WORDS(0xE2);
198 231
199 // Puller state 232 // Puller state
200 u32 acquire_mode; 233 u32 acquire_mode;
@@ -216,8 +249,7 @@ public:
216 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 249 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
217 250
218 /// Swap buffers (render frame) 251 /// Swap buffers (render frame)
219 virtual void SwapBuffers( 252 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
220 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
221 253
222 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 254 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
223 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 255 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
@@ -228,6 +260,9 @@ public:
228 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 260 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
229 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 261 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
230 262
263protected:
264 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
265
231private: 266private:
232 void ProcessBindMethod(const MethodCall& method_call); 267 void ProcessBindMethod(const MethodCall& method_call);
233 void ProcessSemaphoreTriggerMethod(); 268 void ProcessSemaphoreTriggerMethod();
@@ -245,6 +280,7 @@ private:
245 280
246protected: 281protected:
247 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 282 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
283 Core::System& system;
248 VideoCore::RendererBase& renderer; 284 VideoCore::RendererBase& renderer;
249 285
250private: 286private:
@@ -262,6 +298,14 @@ private:
262 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 298 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
263 /// Inline memory engine 299 /// Inline memory engine
264 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 300 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
301
302 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
303
304 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
305
306 std::mutex sync_mutex;
307
308 const bool is_async;
265}; 309};
266 310
267#define ASSERT_REG_POSITION(field_name, position) \ 311#define ASSERT_REG_POSITION(field_name, position) \
@@ -274,6 +318,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
274ASSERT_REG_POSITION(reference_count, 0x14); 318ASSERT_REG_POSITION(reference_count, 0x14);
275ASSERT_REG_POSITION(semaphore_acquire, 0x1A); 319ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
276ASSERT_REG_POSITION(semaphore_release, 0x1B); 320ASSERT_REG_POSITION(semaphore_release, 0x1B);
321ASSERT_REG_POSITION(fence_value, 0x1C);
322ASSERT_REG_POSITION(fence_action, 0x1D);
277 323
278ASSERT_REG_POSITION(acquire_mode, 0x100); 324ASSERT_REG_POSITION(acquire_mode, 0x100);
279ASSERT_REG_POSITION(acquire_source, 0x101); 325ASSERT_REG_POSITION(acquire_source, 0x101);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index d4e2553a9..f2a3a390e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
5#include "video_core/gpu_asynch.h" 7#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h" 8#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h" 9#include "video_core/renderer_base.h"
@@ -9,7 +11,7 @@
9namespace VideoCommon { 11namespace VideoCommon {
10 12
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) 13GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : GPU(system, renderer), gpu_thread{system} {} 14 : GPU(system, renderer, true), gpu_thread{system} {}
13 15
14GPUAsynch::~GPUAsynch() = default; 16GPUAsynch::~GPUAsynch() = default;
15 17
@@ -21,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
21 gpu_thread.SubmitList(std::move(entries)); 23 gpu_thread.SubmitList(std::move(entries));
22} 24}
23 25
24void GPUAsynch::SwapBuffers( 26void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
25 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 27 gpu_thread.SwapBuffers(framebuffer);
26 gpu_thread.SwapBuffers(std::move(framebuffer));
27} 28}
28 29
29void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { 30void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
@@ -38,4 +39,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
38 gpu_thread.FlushAndInvalidateRegion(addr, size); 39 gpu_thread.FlushAndInvalidateRegion(addr, size);
39} 40}
40 41
42void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
43 auto& interrupt_manager = system.InterruptManager();
44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
45}
46
41} // namespace VideoCommon 47} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 30be74cba..a12f9bac4 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -14,19 +14,21 @@ class RendererBase;
14namespace VideoCommon { 14namespace VideoCommon {
15 15
16/// Implementation of GPU interface that runs the GPU asynchronously 16/// Implementation of GPU interface that runs the GPU asynchronously
17class GPUAsynch : public Tegra::GPU { 17class GPUAsynch final : public Tegra::GPU {
18public: 18public:
19 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); 19 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
20 ~GPUAsynch() override; 20 ~GPUAsynch() override;
21 21
22 void Start() override; 22 void Start() override;
23 void PushGPUEntries(Tegra::CommandList&& entries) override; 23 void PushGPUEntries(Tegra::CommandList&& entries) override;
24 void SwapBuffers( 24 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
25 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
26 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
27 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
29 28
29protected:
30 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
31
30private: 32private:
31 GPUThread::ThreadManager gpu_thread; 33 GPUThread::ThreadManager gpu_thread;
32}; 34};
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 45e43b1dc..d48221077 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,7 +8,7 @@
8namespace VideoCommon { 8namespace VideoCommon {
9 9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) 10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : GPU(system, renderer) {} 11 : GPU(system, renderer, false) {}
12 12
13GPUSynch::~GPUSynch() = default; 13GPUSynch::~GPUSynch() = default;
14 14
@@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
19 dma_pusher->DispatchCalls(); 19 dma_pusher->DispatchCalls();
20} 20}
21 21
22void GPUSynch::SwapBuffers( 22void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 23 renderer.SwapBuffers(framebuffer);
24 renderer.SwapBuffers(std::move(framebuffer));
25} 24}
26 25
27void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { 26void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 3031fcf72..5eb1c461c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -13,18 +13,21 @@ class RendererBase;
13namespace VideoCommon { 13namespace VideoCommon {
14 14
15/// Implementation of GPU interface that runs the GPU synchronously 15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU { 16class GPUSynch final : public Tegra::GPU {
17public: 17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); 18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override; 19 ~GPUSynch() override;
20 20
21 void Start() override; 21 void Start() override;
22 void PushGPUEntries(Tegra::CommandList&& entries) override; 22 void PushGPUEntries(Tegra::CommandList&& entries) override;
23 void SwapBuffers( 23 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
24 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
25 void FlushRegion(CacheAddr addr, u64 size) override; 24 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 25 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27
28protected:
29 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
30 [[maybe_unused]] u32 value) const override {}
28}; 31};
29 32
30} // namespace VideoCommon 33} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3f0939ec9..5f039e4fd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
21 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
22 22
23 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
24 state.WaitForCommands(); 24 while (state.queue.Empty())
25 ;
25 26
26 // If emulation was stopped during disk shader loading, abort before trying to acquire context 27 // If emulation was stopped during disk shader loading, abort before trying to acquire context
27 if (!state.is_running) { 28 if (!state.is_running) {
@@ -32,14 +33,13 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
32 33
33 CommandDataContainer next; 34 CommandDataContainer next;
34 while (state.is_running) { 35 while (state.is_running) {
35 state.WaitForCommands();
36 while (!state.queue.Empty()) { 36 while (!state.queue.Empty()) {
37 state.queue.Pop(next); 37 state.queue.Pop(next);
38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
39 dma_pusher.Push(std::move(submit_list->entries)); 39 dma_pusher.Push(std::move(submit_list->entries));
40 dma_pusher.DispatchCalls(); 40 dma_pusher.DispatchCalls();
41 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 41 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
42 renderer.SwapBuffers(std::move(data->framebuffer)); 42 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
44 renderer.Rasterizer().FlushRegion(data->addr, data->size); 44 renderer.Rasterizer().FlushRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
49 } else { 49 } else {
50 UNREACHABLE(); 50 UNREACHABLE();
51 } 51 }
52 state.signaled_fence = next.fence; 52 state.signaled_fence.store(next.fence);
53 state.TrySynchronize();
54 } 53 }
55 } 54 }
56} 55}
@@ -79,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
79 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); 78 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
80} 79}
81 80
82void ThreadManager::SwapBuffers( 81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
83 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 82 PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
84 PushCommand(SwapBuffersCommand(std::move(framebuffer))); 83 : std::optional<const Tegra::FramebufferConfig>{}));
85} 84}
86 85
87void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { 86void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
@@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
89} 88}
90 89
91void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 90void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
92 if (state.queue.Empty()) { 91 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
93 // It's quicker to invalidate a single region on the CPU if the queue is already empty
94 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
95 } else {
96 PushCommand(InvalidateRegionCommand(addr, size));
97 }
98} 92}
99 93
100void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 94void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
105u64 ThreadManager::PushCommand(CommandData&& command_data) { 99u64 ThreadManager::PushCommand(CommandData&& command_data) {
106 const u64 fence{++state.last_fence}; 100 const u64 fence{++state.last_fence};
107 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 101 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
108 state.SignalCommands();
109 return fence; 102 return fence;
110} 103}
111 104
112MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
113void SynchState::WaitForSynchronization(u64 fence) { 106void SynchState::WaitForSynchronization(u64 fence) {
114 if (signaled_fence >= fence) { 107 while (signaled_fence.load() < fence)
115 return; 108 ;
116 }
117
118 // Wait for the GPU to be idle (all commands to be executed)
119 {
120 MICROPROFILE_SCOPE(GPU_wait);
121 std::unique_lock lock{synchronization_mutex};
122 synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
123 }
124} 109}
125 110
126} // namespace VideoCommon::GPUThread 111} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 05a168a72..3ae0ec9f3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -88,41 +88,9 @@ struct CommandDataContainer {
88/// Struct used to synchronize the GPU thread 88/// Struct used to synchronize the GPU thread
89struct SynchState final { 89struct SynchState final {
90 std::atomic_bool is_running{true}; 90 std::atomic_bool is_running{true};
91 std::atomic_int queued_frame_count{};
92 std::mutex synchronization_mutex;
93 std::mutex commands_mutex;
94 std::condition_variable commands_condition;
95 std::condition_variable synchronization_condition;
96
97 /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
98 /// synchronized. This is entirely empirical.
99 bool IsSynchronized() const {
100 constexpr std::size_t max_queue_gap{5};
101 return queue.Size() <= max_queue_gap;
102 }
103
104 void TrySynchronize() {
105 if (IsSynchronized()) {
106 std::lock_guard lock{synchronization_mutex};
107 synchronization_condition.notify_one();
108 }
109 }
110 91
111 void WaitForSynchronization(u64 fence); 92 void WaitForSynchronization(u64 fence);
112 93
113 void SignalCommands() {
114 if (queue.Empty()) {
115 return;
116 }
117
118 commands_condition.notify_one();
119 }
120
121 void WaitForCommands() {
122 std::unique_lock lock{commands_mutex};
123 commands_condition.wait(lock, [this] { return !queue.Empty(); });
124 }
125
126 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
127 CommandQueue queue; 95 CommandQueue queue;
128 u64 last_fence{}; 96 u64 last_fence{};
@@ -142,8 +110,7 @@ public:
142 void SubmitList(Tegra::CommandList&& entries); 110 void SubmitList(Tegra::CommandList&& entries);
143 111
144 /// Swap buffers (render frame) 112 /// Swap buffers (render frame)
145 void SwapBuffers( 113 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
146 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
147 114
148 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 115 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
149 void FlushRegion(CacheAddr addr, u64 size); 116 void FlushRegion(CacheAddr addr, u64 size);
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692..4e1cb98db 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,17 +4,28 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h" 9#include "video_core/macro_interpreter.h"
9 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12
10namespace Tegra { 13namespace Tegra {
11 14
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13 16
14void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { 17void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) {
18 MICROPROFILE_SCOPE(MacroInterp);
15 Reset(); 19 Reset();
20
16 registers[1] = parameters[0]; 21 registers[1] = parameters[0];
17 this->parameters = std::move(parameters); 22
23 if (num_parameters > parameters_capacity) {
24 parameters_capacity = num_parameters;
25 this->parameters = std::make_unique<u32[]>(num_parameters);
26 }
27 std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32));
28 this->num_parameters = num_parameters;
18 29
19 // Execute the code until we hit an exit condition. 30 // Execute the code until we hit an exit condition.
20 bool keep_executing = true; 31 bool keep_executing = true;
@@ -23,7 +34,7 @@ void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
23 } 34 }
24 35
25 // Assert the the macro used all the input parameters 36 // Assert the the macro used all the input parameters
26 ASSERT(next_parameter_index == this->parameters.size()); 37 ASSERT(next_parameter_index == num_parameters);
27} 38}
28 39
29void MacroInterpreter::Reset() { 40void MacroInterpreter::Reset() {
@@ -31,7 +42,7 @@ void MacroInterpreter::Reset() {
31 pc = 0; 42 pc = 0;
32 delayed_pc = {}; 43 delayed_pc = {};
33 method_address.raw = 0; 44 method_address.raw = 0;
34 parameters.clear(); 45 num_parameters = 0;
35 // The next parameter index starts at 1, because $r1 already has the value of the first 46 // The next parameter index starts at 1, because $r1 already has the value of the first
36 // parameter. 47 // parameter.
37 next_parameter_index = 1; 48 next_parameter_index = 1;
@@ -225,7 +236,8 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
225} 236}
226 237
227u32 MacroInterpreter::FetchParameter() { 238u32 MacroInterpreter::FetchParameter() {
228 return parameters.at(next_parameter_index++); 239 ASSERT(next_parameter_index < num_parameters);
240 return parameters[next_parameter_index++];
229} 241}
230 242
231u32 MacroInterpreter::GetRegister(u32 register_id) const { 243u32 MacroInterpreter::GetRegister(u32 register_id) const {
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index cde360288..76b6a895b 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -25,7 +25,7 @@ public:
25 * @param offset Offset to start execution at. 25 * @param offset Offset to start execution at.
26 * @param parameters The parameters of the macro. 26 * @param parameters The parameters of the macro.
27 */ 27 */
28 void Execute(u32 offset, std::vector<u32> parameters); 28 void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
29 29
30private: 30private:
31 enum class Operation : u32 { 31 enum class Operation : u32 {
@@ -162,10 +162,12 @@ private:
162 MethodAddress method_address = {}; 162 MethodAddress method_address = {};
163 163
164 /// Input parameters of the current macro. 164 /// Input parameters of the current macro.
165 std::vector<u32> parameters; 165 std::unique_ptr<u32[]> parameters;
166 std::size_t num_parameters = 0;
167 std::size_t parameters_capacity = 0;
166 /// Index of the next parameter that will be fetched by the 'parm' instruction. 168 /// Index of the next parameter that will be fetched by the 'parm' instruction.
167 u32 next_parameter_index = 0; 169 u32 next_parameter_index = 0;
168 170
169 bool carry_flag{}; 171 bool carry_flag = false;
170}; 172};
171} // namespace Tegra 173} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 5d8d126c1..bffae940c 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,13 +5,17 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h"
8#include "core/memory.h" 11#include "core/memory.h"
9#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
11 14
12namespace Tegra { 15namespace Tegra {
13 16
14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { 17MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
18 : rasterizer{rasterizer}, system{system} {
15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 19 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
16 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 20 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
17 Common::PageType::Unmapped); 21 Common::PageType::Unmapped);
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
49 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; 53 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
50 54
51 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 55 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
56 ASSERT(system.CurrentProcess()
57 ->VMManager()
58 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
59 Kernel::MemoryAttribute::DeviceMapped)
60 .IsSuccess());
52 61
53 return gpu_addr; 62 return gpu_addr;
54} 63}
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
59 const u64 aligned_size{Common::AlignUp(size, page_size)}; 68 const u64 aligned_size{Common::AlignUp(size, page_size)};
60 69
61 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 70 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
62 71 ASSERT(system.CurrentProcess()
72 ->VMManager()
73 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
74 Kernel::MemoryAttribute::DeviceMapped)
75 .IsSuccess());
63 return gpu_addr; 76 return gpu_addr;
64} 77}
65 78
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
68 81
69 const u64 aligned_size{Common::AlignUp(size, page_size)}; 82 const u64 aligned_size{Common::AlignUp(size, page_size)};
70 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; 83 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
85 ASSERT(cpu_addr);
71 86
72 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); 87 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
73 UnmapRange(gpu_addr, aligned_size); 88 UnmapRange(gpu_addr, aligned_size);
89 ASSERT(system.CurrentProcess()
90 ->VMManager()
91 .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
92 Kernel::MemoryAttribute::None)
93 .IsSuccess());
74 94
75 return gpu_addr; 95 return gpu_addr;
76} 96}
@@ -202,11 +222,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
202} 222}
203 223
204bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { 224bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
205 const GPUVAddr end = start + size; 225 const std::size_t inner_size = size - 1;
226 const GPUVAddr end = start + inner_size;
206 const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); 227 const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
207 const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); 228 const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
208 const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); 229 const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
209 return range == size; 230 return range == inner_size;
210} 231}
211 232
212void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { 233void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 43a84bd52..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -14,6 +14,10 @@ namespace VideoCore {
14class RasterizerInterface; 14class RasterizerInterface;
15} 15}
16 16
17namespace Core {
18class System;
19}
20
17namespace Tegra { 21namespace Tegra {
18 22
19/** 23/**
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
47 51
48class MemoryManager final { 52class MemoryManager final {
49public: 53public:
50 explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); 54 explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
51 ~MemoryManager(); 55 ~MemoryManager();
52 56
53 GPUVAddr AllocateSpace(u64 size, u64 align); 57 GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -173,6 +177,8 @@ private:
173 Common::PageTable page_table{page_bits}; 177 Common::PageTable page_table{page_bits};
174 VMAMap vma_map; 178 VMAMap vma_map;
175 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180
181 Core::System& system;
176}; 182};
177 183
178} // namespace Tegra 184} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 3e91cbc83..084f85e67 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
25 25
26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
27 // pixel values. 27 // pixel values.
28 const u32 tile_size_x{GetDefaultBlockWidth(format)}; 28 constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
29 const u32 tile_size_y{GetDefaultBlockHeight(format)}; 29 constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
30 30
31 if constexpr (morton_to_linear) { 31 if constexpr (morton_to_linear) {
32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, 32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
@@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
186 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 186 return morton_to_linear_fns[static_cast<std::size_t>(format)];
187} 187}
188 188
189static u32 MortonInterleave128(u32 x, u32 y) {
190 // 128x128 Z-Order coordinate from 2D coordinates
191 static constexpr u32 xlut[] = {
192 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
193 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
194 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
195 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
196 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
197 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
198 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
199 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
200 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
201 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
202 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
203 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
204 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
205 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
206 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
207 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
208 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
209 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
210 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
211 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
212 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
213 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
214 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
215 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
216 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
217 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
218 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
219 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
220 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
221 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
222 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
223 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
224 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
225 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
226 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
227 };
228 static constexpr u32 ylut[] = {
229 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
230 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
231 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
232 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
233 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
234 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
235 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
236 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
237 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
238 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
239 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
240 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
241 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
242 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
243 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
244 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
245 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
246 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
247 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
248 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
249 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
250 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
251 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
252 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
253 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
254 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
255 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
256 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
257 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
258 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
259 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
260 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
261 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
262 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
263 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
264 };
265 return xlut[x % 128] + ylut[y % 128];
266}
267
268static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
269 // Calculates the offset of the position of the pixel in Morton order
270 // Framebuffer images are split into 128x128 tiles.
271
272 constexpr u32 block_height = 128;
273 const u32 coarse_x = x & ~127;
274
275 const u32 i = MortonInterleave128(x, y);
276
277 const u32 offset = coarse_x * block_height;
278
279 return (i + offset) * bytes_per_pixel;
280}
281
282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 189void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 190 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
284 u8* buffer, u8* addr) { 191 u8* buffer, u8* addr) {
@@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri
286 tile_width_spacing, buffer, addr); 193 tile_width_spacing, buffer, addr);
287} 194}
288 195
289void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
290 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
291 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
292 u8* data_ptrs[2];
293 for (u32 y = 0; y < height; ++y) {
294 for (u32 x = 0; x < width; ++x) {
295 const u32 coarse_y = y & ~127;
296 const u32 morton_offset =
297 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
298 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
299
300 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
301 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
302
303 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
304 }
305 }
306}
307
308} // namespace VideoCore 196} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index ee5b45555..b714a7e3f 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, u8* addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20
21} // namespace VideoCore 18} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index d7b86df38..6b3f2d50a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -10,6 +10,10 @@
10#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
11#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12 12
13namespace Tegra {
14class MemoryManager;
15}
16
13namespace VideoCore { 17namespace VideoCore {
14 18
15enum class LoadCallbackStage { 19enum class LoadCallbackStage {
@@ -30,6 +34,9 @@ public:
30 /// Clear the current framebuffer 34 /// Clear the current framebuffer
31 virtual void Clear() = 0; 35 virtual void Clear() = 0;
32 36
37 /// Dispatches a compute shader invocation
38 virtual void DispatchCompute(GPUVAddr code_addr) = 0;
39
33 /// Notify rasterizer that all caches should be flushed to Switch memory 40 /// Notify rasterizer that all caches should be flushed to Switch memory
34 virtual void FlushAll() = 0; 41 virtual void FlushAll() = 0;
35 42
@@ -43,11 +50,16 @@ public:
43 /// and invalidated 50 /// and invalidated
44 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 51 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
45 52
53 /// Notify the rasterizer to send all written commands to the host GPU.
54 virtual void FlushCommands() = 0;
55
56 /// Notify rasterizer that a frame is about to finish
57 virtual void TickFrame() = 0;
58
46 /// Attempt to use a faster method to perform a surface copy 59 /// Attempt to use a faster method to perform a surface copy
47 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 60 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
48 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 61 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
49 const Common::Rectangle<u32>& src_rect, 62 const Tegra::Engines::Fermi2D::Config& copy_config) {
50 const Common::Rectangle<u32>& dst_rect) {
51 return false; 63 return false;
52 } 64 }
53 65
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d54c3723..af1bebc4f 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -36,8 +36,7 @@ public:
36 virtual ~RendererBase(); 36 virtual ~RendererBase();
37 37
38 /// Swap buffers (render frame) 38 /// Swap buffers (render frame)
39 virtual void SwapBuffers( 39 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
40 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
41 40
42 /// Initialize the renderer 41 /// Initialize the renderer
43 virtual bool Init() = 0; 42 virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2b9bd142e..f8a807c84 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,103 +2,71 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
7 6
8#include "common/alignment.h" 7#include <glad/glad.h>
9#include "core/core.h" 8
10#include "video_core/memory_manager.h" 9#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/rasterizer_interface.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 12#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
13 15
14namespace OpenGL { 16namespace OpenGL {
15 17
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, 18MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
17 std::size_t alignment, u8* host_ptr) 19
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, 20CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
19 alignment{alignment} {} 21 : VideoCommon::BufferBlock{cache_addr, size} {
20 22 gl_buffer.Create();
21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 23 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
23
24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
25 bool cache) {
26 std::lock_guard lock{mutex};
27 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
28
29 // Cache management is a big overhead, so only cache entries with a given size.
30 // TODO: Figure out which size is the best for given games.
31 cache &= size >= 2048;
32
33 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
34 if (cache) {
35 auto entry = TryGet(host_ptr);
36 if (entry) {
37 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
38 return entry->GetOffset();
39 }
40 Unregister(entry);
41 }
42 }
43
44 AlignBuffer(alignment);
45 const GLintptr uploaded_offset = buffer_offset;
46
47 if (!host_ptr) {
48 return uploaded_offset;
49 }
50
51 std::memcpy(buffer_ptr, host_ptr, size);
52 buffer_ptr += size;
53 buffer_offset += size;
54
55 if (cache) {
56 auto entry = std::make_shared<CachedBufferEntry>(
57 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
58 Register(entry);
59 }
60
61 return uploaded_offset;
62} 24}
63 25
64GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, 26CachedBufferBlock::~CachedBufferBlock() = default;
65 std::size_t alignment) { 27
66 std::lock_guard lock{mutex}; 28OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
67 AlignBuffer(alignment); 29 std::size_t stream_size)
68 std::memcpy(buffer_ptr, raw_pointer, size); 30 : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
69 const GLintptr uploaded_offset = buffer_offset; 31 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
32
33OGLBufferCache::~OGLBufferCache() = default;
70 34
71 buffer_ptr += size; 35Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
72 buffer_offset += size; 36 return std::make_shared<CachedBufferBlock>(cache_addr, size);
73 return uploaded_offset;
74} 37}
75 38
76bool OGLBufferCache::Map(std::size_t max_size) { 39void OGLBufferCache::WriteBarrier() {
77 bool invalidate; 40 glMemoryBarrier(GL_ALL_BARRIER_BITS);
78 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 41}
79 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 42
80 buffer_offset = buffer_offset_base; 43const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
44 return buffer->GetHandle();
45}
81 46
82 if (invalidate) { 47const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
83 InvalidateAll(); 48 static const GLuint null_buffer = 0;
84 } 49 return &null_buffer;
85 return invalidate;
86} 50}
87 51
88void OGLBufferCache::Unmap() { 52void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
89 stream_buffer.Unmap(buffer_offset - buffer_offset_base); 53 const u8* data) {
54 glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
55 static_cast<GLsizeiptr>(size), data);
90} 56}
91 57
92GLuint OGLBufferCache::GetHandle() const { 58void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
93 return stream_buffer.GetHandle(); 59 u8* data) {
60 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
61 glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
62 static_cast<GLsizeiptr>(size), data);
94} 63}
95 64
96void OGLBufferCache::AlignBuffer(std::size_t alignment) { 65void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
97 // Align the offset, not the mapped pointer 66 std::size_t dst_offset, std::size_t size) {
98 const GLintptr offset_aligned = 67 glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
99 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); 68 static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
100 buffer_ptr += offset_aligned - buffer_offset; 69 static_cast<GLsizeiptr>(size));
101 buffer_offset = offset_aligned;
102} 70}
103 71
104} // namespace OpenGL 72} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f2347581b..022e7bfa9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,80 +4,63 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8#include <memory> 7#include <memory>
9#include <tuple>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 14
15namespace Core {
16class System;
17}
18
16namespace OpenGL { 19namespace OpenGL {
17 20
21class OGLStreamBuffer;
18class RasterizerOpenGL; 22class RasterizerOpenGL;
19 23
20class CachedBufferEntry final : public RasterizerCacheObject { 24class CachedBufferBlock;
21public:
22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
27 }
28 25
29 std::size_t GetSizeInBytes() const override { 26using Buffer = std::shared_ptr<CachedBufferBlock>;
30 return size;
31 }
32
33 std::size_t GetSize() const {
34 return size;
35 }
36 27
37 GLintptr GetOffset() const { 28class CachedBufferBlock : public VideoCommon::BufferBlock {
38 return offset; 29public:
39 } 30 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
31 ~CachedBufferBlock();
40 32
41 std::size_t GetAlignment() const { 33 const GLuint* GetHandle() const {
42 return alignment; 34 return &gl_buffer.handle;
43 } 35 }
44 36
45private: 37private:
46 VAddr cpu_addr{}; 38 OGLBuffer gl_buffer{};
47 std::size_t size{};
48 GLintptr offset{};
49 std::size_t alignment{};
50}; 39};
51 40
52class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 41class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
53public: 42public:
54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); 43 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
55 44 std::size_t stream_size);
56 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 45 ~OGLBufferCache();
57 /// allocated.
58 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
59 bool cache = true);
60 46
61 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 47 const GLuint* GetEmptyBuffer(std::size_t) override;
62 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
63 48
64 bool Map(std::size_t max_size); 49protected:
65 void Unmap(); 50 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
66 51
67 GLuint GetHandle() const; 52 void WriteBarrier() override;
68 53
69protected: 54 const GLuint* ToHandle(const Buffer& buffer) override;
70 void AlignBuffer(std::size_t alignment);
71 55
72 // We do not have to flush this cache as things in it are never modified by us. 56 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
73 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} 57 const u8* data) override;
74 58
75private: 59 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
76 OGLStreamBuffer stream_buffer; 60 u8* data) override;
77 61
78 u8* buffer_ptr = nullptr; 62 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
79 GLintptr buffer_offset = 0; 63 std::size_t dst_offset, std::size_t size) override;
80 GLintptr buffer_offset_base = 0;
81}; 64};
82 65
83} // namespace OpenGL 66} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a48e14d2e..4f59a87b4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -14,52 +14,64 @@
14namespace OpenGL { 14namespace OpenGL {
15 15
16namespace { 16namespace {
17
17template <typename T> 18template <typename T>
18T GetInteger(GLenum pname) { 19T GetInteger(GLenum pname) {
19 GLint temporary; 20 GLint temporary;
20 glGetIntegerv(pname, &temporary); 21 glGetIntegerv(pname, &temporary);
21 return static_cast<T>(temporary); 22 return static_cast<T>(temporary);
22} 23}
24
25bool TestProgram(const GLchar* glsl) {
26 const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
27 GLint link_status;
28 glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
29 glDeleteProgram(shader);
30 return link_status == GL_TRUE;
31}
32
23} // Anonymous namespace 33} // Anonymous namespace
24 34
25Device::Device() { 35Device::Device() {
26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 36 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
37 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
27 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 38 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 39 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
40 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
41 GLAD_GL_NV_shader_thread_shuffle;
42 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
29 has_variable_aoffi = TestVariableAoffi(); 43 has_variable_aoffi = TestVariableAoffi();
30 has_component_indexing_bug = TestComponentIndexingBug(); 44 has_component_indexing_bug = TestComponentIndexingBug();
45 has_precise_bug = TestPreciseBug();
46
47 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
48 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
49 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
31} 50}
32 51
33Device::Device(std::nullptr_t) { 52Device::Device(std::nullptr_t) {
34 uniform_buffer_alignment = 0; 53 uniform_buffer_alignment = 0;
35 max_vertex_attributes = 16; 54 max_vertex_attributes = 16;
36 max_varyings = 15; 55 max_varyings = 15;
56 has_warp_intrinsics = true;
57 has_vertex_viewport_layer = true;
37 has_variable_aoffi = true; 58 has_variable_aoffi = true;
38 has_component_indexing_bug = false; 59 has_component_indexing_bug = false;
60 has_precise_bug = false;
39} 61}
40 62
41bool Device::TestVariableAoffi() { 63bool Device::TestVariableAoffi() {
42 const GLchar* AOFFI_TEST = R"(#version 430 core 64 return TestProgram(R"(#version 430 core
43// This is a unit test, please ignore me on apitrace bug reports. 65// This is a unit test, please ignore me on apitrace bug reports.
44uniform sampler2D tex; 66uniform sampler2D tex;
45uniform ivec2 variable_offset; 67uniform ivec2 variable_offset;
46out vec4 output_attribute; 68out vec4 output_attribute;
47void main() { 69void main() {
48 output_attribute = textureOffset(tex, vec2(0), variable_offset); 70 output_attribute = textureOffset(tex, vec2(0), variable_offset);
49} 71})");
50)";
51 const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
52 GLint link_status{};
53 glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
54 glDeleteProgram(shader);
55
56 const bool supported{link_status == GL_TRUE};
57 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
58 return supported;
59} 72}
60 73
61bool Device::TestComponentIndexingBug() { 74bool Device::TestComponentIndexingBug() {
62 constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
63 const GLchar* COMPONENT_TEST = R"(#version 430 core 75 const GLchar* COMPONENT_TEST = R"(#version 430 core
64layout (std430, binding = 0) buffer OutputBuffer { 76layout (std430, binding = 0) buffer OutputBuffer {
65 uint output_value; 77 uint output_value;
@@ -99,12 +111,21 @@ void main() {
99 GLuint result; 111 GLuint result;
100 glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); 112 glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
101 if (result != values.at(index)) { 113 if (result != values.at(index)) {
102 LOG_INFO(Render_OpenGL, log_message, true);
103 return true; 114 return true;
104 } 115 }
105 } 116 }
106 LOG_INFO(Render_OpenGL, log_message, false);
107 return false; 117 return false;
108} 118}
109 119
120bool Device::TestPreciseBug() {
121 return !TestProgram(R"(#version 430 core
122in vec3 coords;
123out float out_value;
124uniform sampler2DShadow tex;
125void main() {
126 precise float tmp_value = vec4(texture(tex, coords)).x;
127 out_value = tmp_value;
128})");
129}
130
110} // namespace OpenGL 131} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..ba6dcd3be 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -18,6 +18,10 @@ public:
18 return uniform_buffer_alignment; 18 return uniform_buffer_alignment;
19 } 19 }
20 20
21 std::size_t GetShaderStorageBufferAlignment() const {
22 return shader_storage_alignment;
23 }
24
21 u32 GetMaxVertexAttributes() const { 25 u32 GetMaxVertexAttributes() const {
22 return max_vertex_attributes; 26 return max_vertex_attributes;
23 } 27 }
@@ -26,6 +30,14 @@ public:
26 return max_varyings; 30 return max_varyings;
27 } 31 }
28 32
33 bool HasWarpIntrinsics() const {
34 return has_warp_intrinsics;
35 }
36
37 bool HasVertexViewportLayer() const {
38 return has_vertex_viewport_layer;
39 }
40
29 bool HasVariableAoffi() const { 41 bool HasVariableAoffi() const {
30 return has_variable_aoffi; 42 return has_variable_aoffi;
31 } 43 }
@@ -34,15 +46,24 @@ public:
34 return has_component_indexing_bug; 46 return has_component_indexing_bug;
35 } 47 }
36 48
49 bool HasPreciseBug() const {
50 return has_precise_bug;
51 }
52
37private: 53private:
38 static bool TestVariableAoffi(); 54 static bool TestVariableAoffi();
39 static bool TestComponentIndexingBug(); 55 static bool TestComponentIndexingBug();
56 static bool TestPreciseBug();
40 57
41 std::size_t uniform_buffer_alignment{}; 58 std::size_t uniform_buffer_alignment{};
59 std::size_t shader_storage_alignment{};
42 u32 max_vertex_attributes{}; 60 u32 max_vertex_attributes{};
43 u32 max_varyings{}; 61 u32 max_varyings{};
62 bool has_warp_intrinsics{};
63 bool has_vertex_viewport_layer{};
44 bool has_variable_aoffi{}; 64 bool has_variable_aoffi{};
45 bool has_component_indexing_bug{}; 65 bool has_component_indexing_bug{};
66 bool has_precise_bug{};
46}; 67};
47 68
48} // namespace OpenGL 69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
new file mode 100644
index 000000000..7c926bd48
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -0,0 +1,75 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/cityhash.h"
8#include "common/scope_exit.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
11#include "video_core/renderer_opengl/gl_state.h"
12
13namespace OpenGL {
14
15using Maxwell = Tegra::Engines::Maxwell3D::Regs;
16
17FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
18
19FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
20
21GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
22 const auto [entry, is_cache_miss] = cache.try_emplace(key);
23 auto& framebuffer{entry->second};
24 if (is_cache_miss) {
25 framebuffer = CreateFramebuffer(key);
26 }
27 return framebuffer.handle;
28}
29
30OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
31 OGLFramebuffer framebuffer;
32 framebuffer.Create();
33
34 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
35 local_state.draw.draw_framebuffer = framebuffer.handle;
36 local_state.ApplyFramebufferState();
37
38 if (key.is_single_buffer) {
39 if (key.color_attachments[0] != GL_NONE && key.colors[0]) {
40 key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER);
41 glDrawBuffer(key.color_attachments[0]);
42 } else {
43 glDrawBuffer(GL_NONE);
44 }
45 } else {
46 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
47 if (key.colors[index]) {
48 key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
49 GL_DRAW_FRAMEBUFFER);
50 }
51 }
52 glDrawBuffers(key.colors_count, key.color_attachments.data());
53 }
54
55 if (key.zeta) {
56 key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
57 GL_DRAW_FRAMEBUFFER);
58 }
59
60 return framebuffer;
61}
62
63std::size_t FramebufferCacheKey::Hash() const {
64 static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
65 return static_cast<std::size_t>(
66 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
67}
68
69bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
70 return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors,
71 zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count,
72 rhs.color_attachments, rhs.colors, rhs.zeta);
73}
74
75} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
new file mode 100644
index 000000000..a3a996353
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -0,0 +1,68 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <unordered_map>
10
11#include <glad/glad.h>
12
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_state.h"
17#include "video_core/renderer_opengl/gl_texture_cache.h"
18
19namespace OpenGL {
20
21struct alignas(sizeof(u64)) FramebufferCacheKey {
22 bool is_single_buffer = false;
23 bool stencil_enable = false;
24 u16 colors_count = 0;
25
26 std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
27 std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
28 View zeta;
29
30 std::size_t Hash() const;
31
32 bool operator==(const FramebufferCacheKey& rhs) const;
33
34 bool operator!=(const FramebufferCacheKey& rhs) const {
35 return !operator==(rhs);
36 }
37};
38
39} // namespace OpenGL
40
41namespace std {
42
43template <>
44struct hash<OpenGL::FramebufferCacheKey> {
45 std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
46 return k.Hash();
47 }
48};
49
50} // namespace std
51
52namespace OpenGL {
53
54class FramebufferCacheOpenGL {
55public:
56 FramebufferCacheOpenGL();
57 ~FramebufferCacheOpenGL();
58
59 GLuint GetFramebuffer(const FramebufferCacheKey& key);
60
61private:
62 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
63
64 OpenGLState local_state;
65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
66};
67
68} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
deleted file mode 100644
index d5e385151..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_opengl/gl_global_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
20 buffer.Create();
21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
22}
23
24CachedGlobalRegion::~CachedGlobalRegion() = default;
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 size = size_;
28 if (size > max_size) {
29 size = max_size;
30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
31 max_size);
32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35
36void CachedGlobalRegion::Flush() {
37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
59 ReserveGlobalRegion(region);
60 }
61 region->Reload(size);
62 return region;
63}
64
65void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
66 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
67}
68
69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
75
76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
77 const GLShader::GlobalMemoryEntry& global_region,
78 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
79 std::lock_guard lock{mutex};
80
81 auto& gpu{Core::System::GetInstance().GPU()};
82 auto& memory_manager{gpu.MemoryManager()};
83 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
84 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
85 global_region.GetCbufOffset()};
86 const auto actual_addr{memory_manager.Read<u64>(addr)};
87 const auto size{memory_manager.Read<u32>(addr + 8)};
88
89 // Look up global region in the cache based on address
90 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
91 GlobalRegion region{TryGet(host_ptr)};
92
93 if (!region) {
94 // No global region found - create a new one
95 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
96 Register(region);
97 }
98
99 return region;
100}
101
102} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
deleted file mode 100644
index 2d467a240..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ /dev/null
@@ -1,82 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <glad/glad.h>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17
18namespace OpenGL {
19
20namespace GLShader {
21class GlobalMemoryEntry;
22}
23
24class RasterizerOpenGL;
25class CachedGlobalRegion;
26using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27
28class CachedGlobalRegion final : public RasterizerCacheObject {
29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
32
33 VAddr GetCpuAddr() const override {
34 return cpu_addr;
35 }
36
37 std::size_t GetSizeInBytes() const override {
38 return size;
39 }
40
41 /// Gets the GL program handle for the buffer
42 GLuint GetBufferHandle() const {
43 return buffer.handle;
44 }
45
46 /// Reloads the global region from guest memory
47 void Reload(u32 size_);
48
49 void Flush();
50
51private:
52 VAddr cpu_addr{};
53 u8* host_ptr{};
54 u32 size{};
55 u32 max_size{};
56
57 OGLBuffer buffer;
58};
59
60class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
61public:
62 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
63
64 /// Gets the current specified shader stage program
65 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
66 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
67
68protected:
69 void FlushObjectInner(const GlobalRegion& object) override {
70 object->Flush();
71 }
72
73private:
74 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
75 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
76 void ReserveGlobalRegion(GlobalRegion region);
77
78 std::unordered_map<CacheAddr, GlobalRegion> reserve;
79 u32 max_ssbo_size{};
80};
81
82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d77426067..4e266cdad 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <bitset>
7#include <memory> 8#include <memory>
8#include <string> 9#include <string>
9#include <string_view> 10#include <string_view>
@@ -19,7 +20,9 @@
19#include "core/core.h" 20#include "core/core.h"
20#include "core/hle/kernel/process.h" 21#include "core/hle/kernel/process.h"
21#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 27#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 28#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -29,8 +32,10 @@
29namespace OpenGL { 32namespace OpenGL {
30 33
31using Maxwell = Tegra::Engines::Maxwell3D::Regs; 34using Maxwell = Tegra::Engines::Maxwell3D::Regs;
32using PixelFormat = VideoCore::Surface::PixelFormat; 35
33using SurfaceType = VideoCore::Surface::SurfaceType; 36using VideoCore::Surface::PixelFormat;
37using VideoCore::Surface::SurfaceTarget;
38using VideoCore::Surface::SurfaceType;
34 39
35MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); 40MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
36MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); 41MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -78,36 +83,31 @@ struct DrawParameters {
78 } 83 }
79}; 84};
80 85
81struct FramebufferCacheKey { 86static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
82 bool is_single_buffer = false; 87 const GLShader::ConstBufferEntry& entry) {
83 bool stencil_enable = false; 88 if (!entry.IsIndirect()) {
84 89 return entry.GetSize();
85 std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
86 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
87 u32 colors_count = 0;
88
89 GLuint zeta = 0;
90
91 auto Tie() const {
92 return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
93 zeta);
94 } 90 }
95 91
96 bool operator<(const FramebufferCacheKey& rhs) const { 92 if (buffer.size > Maxwell::MaxConstBufferSize) {
97 return Tie() < rhs.Tie(); 93 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
94 Maxwell::MaxConstBufferSize);
95 return Maxwell::MaxConstBufferSize;
98 } 96 }
99}; 97
98 return buffer.size;
99}
100 100
101RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 101RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
102 ScreenInfo& info) 102 ScreenInfo& info)
103 : res_cache{*this}, shader_cache{*this, system, emu_window, device}, 103 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
104 global_cache{*this}, system{system}, screen_info{info}, 104 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
105 buffer_cache(*this, STREAM_BUFFER_SIZE) {
106 OpenGLState::ApplyDefaultState(); 105 OpenGLState::ApplyDefaultState();
107 106
108 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 107 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
109 state.draw.shader_program = 0; 108 state.draw.shader_program = 0;
110 state.Apply(); 109 state.Apply();
110 clear_framebuffer.Create();
111 111
112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); 112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
113 CheckExtensions(); 113 CheckExtensions();
@@ -121,21 +121,16 @@ void RasterizerOpenGL::CheckExtensions() {
121 Render_OpenGL, 121 Render_OpenGL,
122 "Anisotropic filter is not supported! This can cause graphical issues in some games."); 122 "Anisotropic filter is not supported! This can cause graphical issues in some games.");
123 } 123 }
124 if (!GLAD_GL_ARB_buffer_storage) {
125 LOG_WARNING(
126 Render_OpenGL,
127 "Buffer storage control is not supported! This can cause performance degradation.");
128 }
129} 124}
130 125
131GLuint RasterizerOpenGL::SetupVertexFormat() { 126GLuint RasterizerOpenGL::SetupVertexFormat() {
132 auto& gpu = system.GPU().Maxwell3D(); 127 auto& gpu = system.GPU().Maxwell3D();
133 const auto& regs = gpu.regs; 128 const auto& regs = gpu.regs;
134 129
135 if (!gpu.dirty_flags.vertex_attrib_format) { 130 if (!gpu.dirty.vertex_attrib_format) {
136 return state.draw.vertex_array; 131 return state.draw.vertex_array;
137 } 132 }
138 gpu.dirty_flags.vertex_attrib_format = false; 133 gpu.dirty.vertex_attrib_format = false;
139 134
140 MICROPROFILE_SCOPE(OpenGL_VAO); 135 MICROPROFILE_SCOPE(OpenGL_VAO);
141 136
@@ -152,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
152 state.draw.vertex_array = vao; 147 state.draw.vertex_array = vao;
153 state.ApplyVertexArrayState(); 148 state.ApplyVertexArrayState();
154 149
155 glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
156
157 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 150 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
158 // Enables the first 16 vertex attributes always, as we don't know which ones are actually 151 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
159 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 152 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -191,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
191 } 184 }
192 185
193 // Rebinding the VAO invalidates the vertex buffer bindings. 186 // Rebinding the VAO invalidates the vertex buffer bindings.
194 gpu.dirty_flags.vertex_array.set(); 187 gpu.dirty.ResetVertexArrays();
195 188
196 state.draw.vertex_array = vao_entry.handle; 189 state.draw.vertex_array = vao_entry.handle;
197 return vao_entry.handle; 190 return vao_entry.handle;
@@ -199,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
199 192
200void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 193void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
201 auto& gpu = system.GPU().Maxwell3D(); 194 auto& gpu = system.GPU().Maxwell3D();
202 const auto& regs = gpu.regs; 195 if (!gpu.dirty.vertex_array_buffers)
203
204 if (gpu.dirty_flags.vertex_array.none())
205 return; 196 return;
197 gpu.dirty.vertex_array_buffers = false;
198
199 const auto& regs = gpu.regs;
206 200
207 MICROPROFILE_SCOPE(OpenGL_VB); 201 MICROPROFILE_SCOPE(OpenGL_VB);
208 202
209 // Upload all guest vertex arrays sequentially to our buffer 203 // Upload all guest vertex arrays sequentially to our buffer
210 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 204 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
211 if (!gpu.dirty_flags.vertex_array[index]) 205 if (!gpu.dirty.vertex_array[index])
212 continue; 206 continue;
207 gpu.dirty.vertex_array[index] = false;
208 gpu.dirty.vertex_instance[index] = false;
213 209
214 const auto& vertex_array = regs.vertex_array[index]; 210 const auto& vertex_array = regs.vertex_array[index];
215 if (!vertex_array.IsEnabled()) 211 if (!vertex_array.IsEnabled())
@@ -220,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
220 216
221 ASSERT(end > start); 217 ASSERT(end > start);
222 const u64 size = end - start + 1; 218 const u64 size = end - start + 1;
223 const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); 219 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
224 220
225 // Bind the vertex array to the buffer at the current offset. 221 // Bind the vertex array to the buffer at the current offset.
226 glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, 222 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
227 vertex_array.stride); 223 vertex_array.stride);
228 224
229 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { 225 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
230 // Enable vertex buffer instancing with the specified divisor. 226 // Enable vertex buffer instancing with the specified divisor.
@@ -234,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
234 glVertexArrayBindingDivisor(vao, index, 0); 230 glVertexArrayBindingDivisor(vao, index, 0);
235 } 231 }
236 } 232 }
233}
234
235void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
236 auto& gpu = system.GPU().Maxwell3D();
237
238 if (!gpu.dirty.vertex_instances)
239 return;
240 gpu.dirty.vertex_instances = false;
241
242 const auto& regs = gpu.regs;
243 // Upload all guest vertex arrays sequentially to our buffer
244 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
245 if (!gpu.dirty.vertex_instance[index])
246 continue;
237 247
238 gpu.dirty_flags.vertex_array.reset(); 248 gpu.dirty.vertex_instance[index] = false;
249
250 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
251 regs.vertex_array[index].divisor != 0) {
252 // Enable vertex buffer instancing with the specified divisor.
253 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
254 } else {
255 // Disable the vertex buffer instancing.
256 glVertexArrayBindingDivisor(vao, index, 0);
257 }
258 }
259}
260
261GLintptr RasterizerOpenGL::SetupIndexBuffer() {
262 if (accelerate_draw != AccelDraw::Indexed) {
263 return 0;
264 }
265 MICROPROFILE_SCOPE(OpenGL_Index);
266 const auto& regs = system.GPU().Maxwell3D().regs;
267 const std::size_t size = CalculateIndexBufferSize();
268 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
269 vertex_array_pushbuffer.SetIndexBuffer(buffer);
270 return offset;
239} 271}
240 272
241DrawParameters RasterizerOpenGL::SetupDraw() { 273DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
242 const auto& gpu = system.GPU().Maxwell3D(); 274 const auto& gpu = system.GPU().Maxwell3D();
243 const auto& regs = gpu.regs; 275 const auto& regs = gpu.regs;
244 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 276 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -250,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
250 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); 282 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
251 283
252 if (is_indexed) { 284 if (is_indexed) {
253 MICROPROFILE_SCOPE(OpenGL_Index);
254 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); 285 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
255 params.count = regs.index_array.count; 286 params.count = regs.index_array.count;
256 params.index_buffer_offset = 287 params.index_buffer_offset = index_buffer_offset;
257 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
258 params.base_vertex = static_cast<GLint>(regs.vb_element_base); 288 params.base_vertex = static_cast<GLint>(regs.vb_element_base);
259 } else { 289 } else {
260 params.count = regs.vertex_buffer.count; 290 params.count = regs.vertex_buffer.count;
@@ -270,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
270 BaseBindings base_bindings; 300 BaseBindings base_bindings;
271 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 301 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
272 302
273 // Prepare packed bindings
274 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
275 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
276
277 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 303 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
278 const auto& shader_config = gpu.regs.shader_config[index]; 304 const auto& shader_config = gpu.regs.shader_config[index];
279 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 305 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -294,16 +320,21 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
294 320
295 GLShader::MaxwellUniformData ubo{}; 321 GLShader::MaxwellUniformData ubo{};
296 ubo.SetFromRegs(gpu, stage); 322 ubo.SetFromRegs(gpu, stage);
297 const GLintptr offset = 323 const auto [buffer, offset] =
298 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 324 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
299 325
300 // Bind the emulation info buffer 326 // Bind the emulation info buffer
301 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, 327 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
302 static_cast<GLsizeiptr>(sizeof(ubo)));
303 328
304 Shader shader{shader_cache.GetStageProgram(program)}; 329 Shader shader{shader_cache.GetStageProgram(program)};
305 const auto [program_handle, next_bindings] = 330
306 shader->GetProgramHandle(primitive_mode, base_bindings); 331 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
332 SetupDrawConstBuffers(stage_enum, shader);
333 SetupDrawGlobalMemory(stage_enum, shader);
334 const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
335
336 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
337 const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
307 338
308 switch (program) { 339 switch (program) {
309 case Maxwell::ShaderProgram::VertexA: 340 case Maxwell::ShaderProgram::VertexA:
@@ -321,11 +352,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
321 shader_config.enable.Value(), shader_config.offset); 352 shader_config.enable.Value(), shader_config.offset);
322 } 353 }
323 354
324 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
325 SetupDrawConstBuffers(stage_enum, shader);
326 SetupGlobalRegions(stage_enum, shader);
327 SetupTextures(stage_enum, shader, base_bindings);
328
329 // Workaround for Intel drivers. 355 // Workaround for Intel drivers.
330 // When a clip distance is enabled but not set in the shader it crops parts of the screen 356 // When a clip distance is enabled but not set in the shader it crops parts of the screen
331 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 357 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -343,50 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 base_bindings = next_bindings; 369 base_bindings = next_bindings;
344 } 370 }
345 371
346 bind_ubo_pushbuffer.Bind();
347 bind_ssbo_pushbuffer.Bind();
348
349 SyncClipEnabled(clip_distances); 372 SyncClipEnabled(clip_distances);
350 373
351 gpu.dirty_flags.shaders = false; 374 gpu.dirty.shaders = false;
352}
353
354void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
355 OpenGLState& current_state) {
356 const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
357 auto& framebuffer = entry->second;
358
359 if (is_cache_miss)
360 framebuffer.Create();
361
362 current_state.draw.draw_framebuffer = framebuffer.handle;
363 current_state.ApplyFramebufferState();
364
365 if (!is_cache_miss)
366 return;
367
368 if (fbkey.is_single_buffer) {
369 if (fbkey.color_attachments[0] != GL_NONE) {
370 glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
371 0);
372 }
373 glDrawBuffer(fbkey.color_attachments[0]);
374 } else {
375 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
376 if (fbkey.colors[index]) {
377 glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
378 GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
379 fbkey.colors[index], 0);
380 }
381 }
382 glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
383 }
384
385 if (fbkey.zeta) {
386 GLenum zeta_attachment =
387 fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
388 glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
389 }
390} 375}
391 376
392std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 377std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -469,18 +454,22 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
469 454
470 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 455 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
471 single_color_target}; 456 single_color_target};
472 if (fb_config_state == current_framebuffer_config_state && 457 if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
473 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
474 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 458 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
475 // single color targets). This is done because the guest registers may not change but the 459 // single color targets). This is done because the guest registers may not change but the
476 // host framebuffer may contain different attachments 460 // host framebuffer may contain different attachments
477 return current_depth_stencil_usage; 461 return current_depth_stencil_usage;
478 } 462 }
463 gpu.dirty.render_settings = false;
479 current_framebuffer_config_state = fb_config_state; 464 current_framebuffer_config_state = fb_config_state;
480 465
481 Surface depth_surface; 466 texture_cache.GuardRenderTargets(true);
467
468 View depth_surface{};
482 if (using_depth_fb) { 469 if (using_depth_fb) {
483 depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); 470 depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
471 } else {
472 texture_cache.SetEmptyDepthBuffer();
484 } 473 }
485 474
486 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); 475 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -493,13 +482,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
493 if (using_color_fb) { 482 if (using_color_fb) {
494 if (single_color_target) { 483 if (single_color_target) {
495 // Used when just a single color attachment is enabled, e.g. for clearing a color buffer 484 // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
496 Surface color_surface = 485 View color_surface{
497 res_cache.GetColorBufferSurface(*single_color_target, preserve_contents); 486 texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)};
498 487
499 if (color_surface) { 488 if (color_surface) {
500 // Assume that a surface will be written to if it is used as a framebuffer, even if 489 // Assume that a surface will be written to if it is used as a framebuffer, even if
501 // the shader doesn't actually write to it. 490 // the shader doesn't actually write to it.
502 color_surface->MarkAsModified(true, res_cache); 491 texture_cache.MarkColorBufferInUse(*single_color_target);
503 // Workaround for and issue in nvidia drivers 492 // Workaround for and issue in nvidia drivers
504 // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ 493 // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
505 state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; 494 state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
@@ -508,16 +497,21 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
508 fbkey.is_single_buffer = true; 497 fbkey.is_single_buffer = true;
509 fbkey.color_attachments[0] = 498 fbkey.color_attachments[0] =
510 GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); 499 GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
511 fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; 500 fbkey.colors[0] = color_surface;
501 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
502 if (index != *single_color_target) {
503 texture_cache.SetEmptyColorBuffer(index);
504 }
505 }
512 } else { 506 } else {
513 // Multiple color attachments are enabled 507 // Multiple color attachments are enabled
514 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { 508 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
515 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); 509 View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)};
516 510
517 if (color_surface) { 511 if (color_surface) {
518 // Assume that a surface will be written to if it is used as a framebuffer, even 512 // Assume that a surface will be written to if it is used as a framebuffer, even
519 // if the shader doesn't actually write to it. 513 // if the shader doesn't actually write to it.
520 color_surface->MarkAsModified(true, res_cache); 514 texture_cache.MarkColorBufferInUse(index);
521 // Enable sRGB only for supported formats 515 // Enable sRGB only for supported formats
522 // Workaround for and issue in nvidia drivers 516 // Workaround for and issue in nvidia drivers
523 // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ 517 // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
@@ -527,8 +521,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
527 521
528 fbkey.color_attachments[index] = 522 fbkey.color_attachments[index] =
529 GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); 523 GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
530 fbkey.colors[index] = 524 fbkey.colors[index] = color_surface;
531 color_surface != nullptr ? color_surface->Texture().handle : 0;
532 } 525 }
533 fbkey.is_single_buffer = false; 526 fbkey.is_single_buffer = false;
534 fbkey.colors_count = regs.rt_control.count; 527 fbkey.colors_count = regs.rt_control.count;
@@ -541,26 +534,84 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
541 if (depth_surface) { 534 if (depth_surface) {
542 // Assume that a surface will be written to if it is used as a framebuffer, even if 535 // Assume that a surface will be written to if it is used as a framebuffer, even if
543 // the shader doesn't actually write to it. 536 // the shader doesn't actually write to it.
544 depth_surface->MarkAsModified(true, res_cache); 537 texture_cache.MarkDepthBufferInUse();
545 538
546 fbkey.zeta = depth_surface->Texture().handle; 539 fbkey.zeta = depth_surface;
547 fbkey.stencil_enable = regs.stencil_enable && 540 fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
548 depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
549 } 541 }
550 542
551 SetupCachedFramebuffer(fbkey, current_state); 543 texture_cache.GuardRenderTargets(false);
544
545 current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
552 SyncViewport(current_state); 546 SyncViewport(current_state);
553 547
554 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; 548 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
555} 549}
556 550
551void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
552 bool using_depth_fb, bool using_stencil_fb) {
553 auto& gpu = system.GPU().Maxwell3D();
554 const auto& regs = gpu.regs;
555
556 texture_cache.GuardRenderTargets(true);
557 View color_surface{};
558 if (using_color_fb) {
559 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
560 }
561 View depth_surface{};
562 if (using_depth_fb || using_stencil_fb) {
563 depth_surface = texture_cache.GetDepthBufferSurface(false);
564 }
565 texture_cache.GuardRenderTargets(false);
566
567 current_state.draw.draw_framebuffer = clear_framebuffer.handle;
568 current_state.ApplyFramebufferState();
569
570 if (color_surface) {
571 color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
572 } else {
573 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
574 }
575
576 if (depth_surface) {
577 const auto& params = depth_surface->GetSurfaceParams();
578 switch (params.type) {
579 case VideoCore::Surface::SurfaceType::Depth:
580 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
581 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
582 break;
583 case VideoCore::Surface::SurfaceType::DepthStencil:
584 depth_surface->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
585 break;
586 default:
587 UNIMPLEMENTED();
588 }
589 } else {
590 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
591 0);
592 }
593}
594
557void RasterizerOpenGL::Clear() { 595void RasterizerOpenGL::Clear() {
558 const auto& regs = system.GPU().Maxwell3D().regs; 596 const auto& maxwell3d = system.GPU().Maxwell3D();
597
598 if (!maxwell3d.ShouldExecute()) {
599 return;
600 }
601
602 const auto& regs = maxwell3d.regs;
559 bool use_color{}; 603 bool use_color{};
560 bool use_depth{}; 604 bool use_depth{};
561 bool use_stencil{}; 605 bool use_stencil{};
562 606
563 OpenGLState clear_state; 607 OpenGLState prev_state{OpenGLState::GetCurState()};
608 SCOPE_EXIT({
609 prev_state.AllDirty();
610 prev_state.Apply();
611 });
612
613 OpenGLState clear_state{OpenGLState::GetCurState()};
614 clear_state.SetDefaultViewports();
564 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 615 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
565 regs.clear_buffers.A) { 616 regs.clear_buffers.A) {
566 use_color = true; 617 use_color = true;
@@ -580,11 +631,13 @@ void RasterizerOpenGL::Clear() {
580 // true. 631 // true.
581 clear_state.depth.test_enabled = true; 632 clear_state.depth.test_enabled = true;
582 clear_state.depth.test_func = GL_ALWAYS; 633 clear_state.depth.test_func = GL_ALWAYS;
634 clear_state.depth.write_mask = GL_TRUE;
583 } 635 }
584 if (regs.clear_buffers.S) { 636 if (regs.clear_buffers.S) {
585 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 637 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
586 use_stencil = true; 638 use_stencil = true;
587 clear_state.stencil.test_enabled = true; 639 clear_state.stencil.test_enabled = true;
640
588 if (regs.clear_flags.stencil) { 641 if (regs.clear_flags.stencil) {
589 // Stencil affects the clear so fill it with the used masks 642 // Stencil affects the clear so fill it with the used masks
590 clear_state.stencil.front.test_func = GL_ALWAYS; 643 clear_state.stencil.front.test_func = GL_ALWAYS;
@@ -616,8 +669,9 @@ void RasterizerOpenGL::Clear() {
616 return; 669 return;
617 } 670 }
618 671
619 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( 672 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
620 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); 673
674 SyncViewport(clear_state);
621 if (regs.clear_flags.scissor) { 675 if (regs.clear_flags.scissor) {
622 SyncScissorTest(clear_state); 676 SyncScissorTest(clear_state);
623 } 677 }
@@ -626,20 +680,18 @@ void RasterizerOpenGL::Clear() {
626 clear_state.EmulateViewportWithScissor(); 680 clear_state.EmulateViewportWithScissor();
627 } 681 }
628 682
629 clear_state.ApplyColorMask(); 683 clear_state.AllDirty();
630 clear_state.ApplyDepth(); 684 clear_state.Apply();
631 clear_state.ApplyStencilTest();
632 clear_state.ApplyViewport();
633 685
634 if (use_color) { 686 if (use_color) {
635 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 687 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
636 } 688 }
637 689
638 if (clear_depth && clear_stencil) { 690 if (use_depth && use_stencil) {
639 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 691 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
640 } else if (clear_depth) { 692 } else if (use_depth) {
641 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 693 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
642 } else if (clear_stencil) { 694 } else if (use_stencil) {
643 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 695 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
644 } 696 }
645} 697}
@@ -650,9 +702,11 @@ void RasterizerOpenGL::DrawArrays() {
650 702
651 MICROPROFILE_SCOPE(OpenGL_Drawing); 703 MICROPROFILE_SCOPE(OpenGL_Drawing);
652 auto& gpu = system.GPU().Maxwell3D(); 704 auto& gpu = system.GPU().Maxwell3D();
653 const auto& regs = gpu.regs;
654 705
655 ConfigureFramebuffers(state); 706 if (!gpu.ShouldExecute()) {
707 return;
708 }
709
656 SyncColorMask(); 710 SyncColorMask();
657 SyncFragmentColorClampState(); 711 SyncFragmentColorClampState();
658 SyncMultiSampleState(); 712 SyncMultiSampleState();
@@ -684,31 +738,102 @@ void RasterizerOpenGL::DrawArrays() {
684 Maxwell::MaxShaderStage; 738 Maxwell::MaxShaderStage;
685 739
686 // Add space for at least 18 constant buffers 740 // Add space for at least 18 constant buffers
687 buffer_size += 741 buffer_size += Maxwell::MaxConstBuffers *
688 Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); 742 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
689 743
690 const bool invalidate = buffer_cache.Map(buffer_size); 744 // Prepare the vertex array.
691 if (invalidate) { 745 buffer_cache.Map(buffer_size);
692 // As all cached buffers are invalidated, we need to recheck their state.
693 gpu.dirty_flags.vertex_array.set();
694 }
695 746
747 // Prepare vertex array format.
696 const GLuint vao = SetupVertexFormat(); 748 const GLuint vao = SetupVertexFormat();
749 vertex_array_pushbuffer.Setup(vao);
750
751 // Upload vertex and index data.
697 SetupVertexBuffer(vao); 752 SetupVertexBuffer(vao);
753 SetupVertexInstances(vao);
754 const GLintptr index_buffer_offset = SetupIndexBuffer();
755
756 // Setup draw parameters. It will automatically choose what glDraw* method to use.
757 const DrawParameters params = SetupDraw(index_buffer_offset);
698 758
699 DrawParameters params = SetupDraw(); 759 // Prepare packed bindings.
760 bind_ubo_pushbuffer.Setup(0);
761 bind_ssbo_pushbuffer.Setup(0);
762
763 // Setup shaders and their used resources.
764 texture_cache.GuardSamplers(true);
700 SetupShaders(params.primitive_mode); 765 SetupShaders(params.primitive_mode);
766 texture_cache.GuardSamplers(false);
701 767
702 buffer_cache.Unmap(); 768 ConfigureFramebuffers(state);
769
770 // Signal the buffer cache that we are not going to upload more things.
771 const bool invalidate = buffer_cache.Unmap();
772
773 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
774 vertex_array_pushbuffer.Bind();
775 bind_ubo_pushbuffer.Bind();
776 bind_ssbo_pushbuffer.Bind();
777
778 if (invalidate) {
779 // As all cached buffers are invalidated, we need to recheck their state.
780 gpu.dirty.ResetVertexArrays();
781 }
703 782
704 shader_program_manager->ApplyTo(state); 783 shader_program_manager->ApplyTo(state);
705 state.Apply(); 784 state.Apply();
706 785
707 res_cache.SignalPreDrawCall(); 786 if (texture_cache.TextureBarrier()) {
787 glTextureBarrier();
788 }
789
708 params.DispatchDraw(); 790 params.DispatchDraw();
709 res_cache.SignalPostDrawCall();
710 791
711 accelerate_draw = AccelDraw::Disabled; 792 accelerate_draw = AccelDraw::Disabled;
793 gpu.dirty.memory_general = false;
794}
795
796void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
797 if (!GLAD_GL_ARB_compute_variable_group_size) {
798 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
799 "lack of GL_ARB_compute_variable_group_size");
800 return;
801 }
802
803 auto kernel = shader_cache.GetComputeKernel(code_addr);
804 ProgramVariant variant;
805 variant.texture_buffer_usage = SetupComputeTextures(kernel);
806 SetupComputeImages(kernel);
807
808 const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
809 state.draw.shader_program = program;
810 state.draw.program_pipeline = 0;
811
812 const std::size_t buffer_size =
813 Tegra::Engines::KeplerCompute::NumConstBuffers *
814 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
815 buffer_cache.Map(buffer_size);
816
817 bind_ubo_pushbuffer.Setup(0);
818 bind_ssbo_pushbuffer.Setup(0);
819
820 SetupComputeConstBuffers(kernel);
821 SetupComputeGlobalMemory(kernel);
822
823 buffer_cache.Unmap();
824
825 bind_ubo_pushbuffer.Bind();
826 bind_ssbo_pushbuffer.Bind();
827
828 state.ApplyTextures();
829 state.ApplyImages();
830 state.ApplyShaderProgram();
831 state.ApplyProgramPipeline();
832
833 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
834 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
835 launch_desc.grid_dim_z, launch_desc.block_dim_x,
836 launch_desc.block_dim_y, launch_desc.block_dim_z);
712} 837}
713 838
714void RasterizerOpenGL::FlushAll() {} 839void RasterizerOpenGL::FlushAll() {}
@@ -718,8 +843,8 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
718 if (!addr || !size) { 843 if (!addr || !size) {
719 return; 844 return;
720 } 845 }
721 res_cache.FlushRegion(addr, size); 846 texture_cache.FlushRegion(addr, size);
722 global_cache.FlushRegion(addr, size); 847 buffer_cache.FlushRegion(addr, size);
723} 848}
724 849
725void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 850void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -727,23 +852,31 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
727 if (!addr || !size) { 852 if (!addr || !size) {
728 return; 853 return;
729 } 854 }
730 res_cache.InvalidateRegion(addr, size); 855 texture_cache.InvalidateRegion(addr, size);
731 shader_cache.InvalidateRegion(addr, size); 856 shader_cache.InvalidateRegion(addr, size);
732 global_cache.InvalidateRegion(addr, size);
733 buffer_cache.InvalidateRegion(addr, size); 857 buffer_cache.InvalidateRegion(addr, size);
734} 858}
735 859
736void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 860void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
737 FlushRegion(addr, size); 861 if (Settings::values.use_accurate_gpu_emulation) {
862 FlushRegion(addr, size);
863 }
738 InvalidateRegion(addr, size); 864 InvalidateRegion(addr, size);
739} 865}
740 866
867void RasterizerOpenGL::FlushCommands() {
868 glFlush();
869}
870
871void RasterizerOpenGL::TickFrame() {
872 buffer_cache.TickFrame();
873}
874
741bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 875bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
742 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 876 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
743 const Common::Rectangle<u32>& src_rect, 877 const Tegra::Engines::Fermi2D::Config& copy_config) {
744 const Common::Rectangle<u32>& dst_rect) {
745 MICROPROFILE_SCOPE(OpenGL_Blits); 878 MICROPROFILE_SCOPE(OpenGL_Blits);
746 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); 879 texture_cache.DoFermiCopy(src, dst, copy_config);
747 return true; 880 return true;
748} 881}
749 882
@@ -755,7 +888,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
755 888
756 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 889 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
757 890
758 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; 891 const auto surface{
892 texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
759 if (!surface) { 893 if (!surface) {
760 return {}; 894 return {};
761 } 895 }
@@ -771,7 +905,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
771 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); 905 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
772 } 906 }
773 907
774 screen_info.display_texture = surface->Texture().handle; 908 screen_info.display_texture = surface->GetTexture();
775 909
776 return true; 910 return true;
777} 911}
@@ -779,14 +913,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
779void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 913void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
780 const Shader& shader) { 914 const Shader& shader) {
781 MICROPROFILE_SCOPE(OpenGL_UBO); 915 MICROPROFILE_SCOPE(OpenGL_UBO);
782 const auto stage_index = static_cast<std::size_t>(stage); 916 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
783 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 917 const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
784 const auto& entries = shader->GetShaderEntries().const_buffers; 918 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
919 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
920 SetupConstBuffer(buffer, entry);
921 }
922}
785 923
786 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 924void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
787 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 925 MICROPROFILE_SCOPE(OpenGL_UBO);
788 const auto& entry = entries[bindpoint]; 926 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
789 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 927 for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
928 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
929 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
930 Tegra::Engines::ConstBufferInfo buffer;
931 buffer.address = config.Address();
932 buffer.size = config.size;
933 buffer.enabled = mask[entry.GetIndex()];
934 SetupConstBuffer(buffer, entry);
790 } 935 }
791} 936}
792 937
@@ -794,84 +939,169 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
794 const GLShader::ConstBufferEntry& entry) { 939 const GLShader::ConstBufferEntry& entry) {
795 if (!buffer.enabled) { 940 if (!buffer.enabled) {
796 // Set values to zero to unbind buffers 941 // Set values to zero to unbind buffers
797 bind_ubo_pushbuffer.Push(0, 0, 0); 942 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
798 return; 943 return;
799 } 944 }
800 945
801 std::size_t size;
802 if (entry.IsIndirect()) {
803 // Buffer is accessed indirectly, so upload the entire thing
804 size = buffer.size;
805
806 if (size > MaxConstbufferSize) {
807 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
808 MaxConstbufferSize);
809 size = MaxConstbufferSize;
810 }
811 } else {
812 // Buffer is accessed directly, upload just what we use
813 size = entry.GetSize();
814 }
815
816 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 946 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
817 // UBO alignment requirements. 947 // UBO alignment requirements.
818 size = Common::AlignUp(size, sizeof(GLvec4)); 948 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
819 ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
820 949
821 const std::size_t alignment = device.GetUniformBufferAlignment(); 950 const auto alignment = device.GetUniformBufferAlignment();
822 const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); 951 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
823 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); 952 bind_ubo_pushbuffer.Push(cbuf, offset, size);
824} 953}
825 954
826void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 955void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
827 const Shader& shader) { 956 const Shader& shader) {
828 const auto& entries = shader->GetShaderEntries().global_memory_entries; 957 auto& gpu{system.GPU()};
829 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 958 auto& memory_manager{gpu.MemoryManager()};
830 const auto& entry{entries[bindpoint]}; 959 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
831 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 960 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
832 if (entry.IsWritten()) { 961 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
833 region->MarkAsModified(true, global_cache); 962 const auto gpu_addr{memory_manager.Read<u64>(addr)};
834 } 963 const auto size{memory_manager.Read<u32>(addr + 8)};
835 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 964 SetupGlobalMemory(entry, gpu_addr, size);
836 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 965 }
966}
967
968void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
969 auto& gpu{system.GPU()};
970 auto& memory_manager{gpu.MemoryManager()};
971 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
972 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
973 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
974 const auto gpu_addr{memory_manager.Read<u64>(addr)};
975 const auto size{memory_manager.Read<u32>(addr + 8)};
976 SetupGlobalMemory(entry, gpu_addr, size);
837 } 977 }
838} 978}
839 979
840void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 980void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
841 BaseBindings base_bindings) { 981 GPUVAddr gpu_addr, std::size_t size) {
982 const auto alignment{device.GetShaderStorageBufferAlignment()};
983 const auto [ssbo, buffer_offset] =
984 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
985 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
986}
987
988TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage,
989 const Shader& shader,
990 BaseBindings base_bindings) {
842 MICROPROFILE_SCOPE(OpenGL_Texture); 991 MICROPROFILE_SCOPE(OpenGL_Texture);
843 const auto& gpu = system.GPU(); 992 const auto& gpu = system.GPU();
844 const auto& maxwell3d = gpu.Maxwell3D(); 993 const auto& maxwell3d = gpu.Maxwell3D();
845 const auto& entries = shader->GetShaderEntries().samplers; 994 const auto& entries = shader->GetShaderEntries().samplers;
846 995
847 ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), 996 ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
848 "Exceeded the number of active textures."); 997 "Exceeded the number of active textures.");
849 998
999 TextureBufferUsage texture_buffer_usage{0};
1000
850 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 1001 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
851 const auto& entry = entries[bindpoint]; 1002 const auto& entry = entries[bindpoint];
852 Tegra::Texture::FullTextureInfo texture; 1003 const auto texture = [&]() {
853 if (entry.IsBindless()) { 1004 if (!entry.IsBindless()) {
1005 return maxwell3d.GetStageTexture(stage, entry.GetOffset());
1006 }
854 const auto cbuf = entry.GetBindlessCBuf(); 1007 const auto cbuf = entry.GetBindlessCBuf();
855 Tegra::Texture::TextureHandle tex_handle; 1008 Tegra::Texture::TextureHandle tex_handle;
856 tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); 1009 tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
857 texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); 1010 return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
858 } else { 1011 }();
859 texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); 1012
1013 if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
1014 texture_buffer_usage.set(bindpoint);
860 } 1015 }
861 const u32 current_bindpoint = base_bindings.sampler + bindpoint; 1016 }
862 1017
863 state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); 1018 return texture_buffer_usage;
1019}
864 1020
865 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { 1021TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
866 state.texture_units[current_bindpoint].texture = 1022 MICROPROFILE_SCOPE(OpenGL_Texture);
867 surface->Texture(entry.IsArray()).handle; 1023 const auto& compute = system.GPU().KeplerCompute();
868 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 1024 const auto& entries = kernel->GetShaderEntries().samplers;
869 texture.tic.w_source); 1025
870 } else { 1026 ASSERT_MSG(entries.size() <= std::size(state.textures),
871 // Can occur when texture addr is null or its memory is unmapped/invalid 1027 "Exceeded the number of active textures.");
872 state.texture_units[current_bindpoint].texture = 0; 1028
1029 TextureBufferUsage texture_buffer_usage{0};
1030
1031 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1032 const auto& entry = entries[bindpoint];
1033 const auto texture = [&]() {
1034 if (!entry.IsBindless()) {
1035 return compute.GetTexture(entry.GetOffset());
1036 }
1037 const auto cbuf = entry.GetBindlessCBuf();
1038 Tegra::Texture::TextureHandle tex_handle;
1039 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
1040 return compute.GetTextureInfo(tex_handle, entry.GetOffset());
1041 }();
1042
1043 if (SetupTexture(bindpoint, texture, entry)) {
1044 texture_buffer_usage.set(bindpoint);
873 } 1045 }
874 } 1046 }
1047
1048 return texture_buffer_usage;
1049}
1050
1051bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
1052 const GLShader::SamplerEntry& entry) {
1053 state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
1054
1055 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1056 if (!view) {
1057 // Can occur when texture addr is null or its memory is unmapped/invalid
1058 state.textures[binding] = 0;
1059 return false;
1060 }
1061 state.textures[binding] = view->GetTexture();
1062
1063 if (view->GetSurfaceParams().IsBuffer()) {
1064 return true;
1065 }
1066
1067 // Apply swizzle to textures that are not buffers.
1068 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1069 texture.tic.w_source);
1070 return false;
1071}
1072
1073void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
1074 const auto& compute = system.GPU().KeplerCompute();
1075 const auto& entries = shader->GetShaderEntries().images;
1076 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1077 const auto& entry = entries[bindpoint];
1078 const auto tic = [&]() {
1079 if (!entry.IsBindless()) {
1080 return compute.GetTexture(entry.GetOffset()).tic;
1081 }
1082 const auto cbuf = entry.GetBindlessCBuf();
1083 Tegra::Texture::TextureHandle tex_handle;
1084 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
1085 return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
1086 }();
1087 SetupImage(bindpoint, tic, entry);
1088 }
1089}
1090
1091void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1092 const GLShader::ImageEntry& entry) {
1093 const auto view = texture_cache.GetImageSurface(tic, entry);
1094 if (!view) {
1095 state.images[binding] = 0;
1096 return;
1097 }
1098 if (!tic.IsBuffer()) {
1099 view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1100 }
1101 if (entry.IsWritten()) {
1102 view->MarkAsModified(texture_cache.Tick());
1103 }
1104 state.images[binding] = view->GetTexture();
875} 1105}
876 1106
877void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 1107void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
@@ -915,10 +1145,11 @@ void RasterizerOpenGL::SyncClipCoef() {
915} 1145}
916 1146
917void RasterizerOpenGL::SyncCullMode() { 1147void RasterizerOpenGL::SyncCullMode() {
918 const auto& regs = system.GPU().Maxwell3D().regs; 1148 auto& maxwell3d = system.GPU().Maxwell3D();
919 1149
920 state.cull.enabled = regs.cull.enabled != 0; 1150 const auto& regs = maxwell3d.regs;
921 1151
1152 state.cull.enabled = regs.cull.enabled != 0;
922 if (state.cull.enabled) { 1153 if (state.cull.enabled) {
923 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1154 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
924 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 1155 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -951,15 +1182,23 @@ void RasterizerOpenGL::SyncDepthTestState() {
951 state.depth.test_enabled = regs.depth_test_enable != 0; 1182 state.depth.test_enabled = regs.depth_test_enable != 0;
952 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1183 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
953 1184
954 if (!state.depth.test_enabled) 1185 if (!state.depth.test_enabled) {
955 return; 1186 return;
1187 }
956 1188
957 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1189 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
958} 1190}
959 1191
960void RasterizerOpenGL::SyncStencilTestState() { 1192void RasterizerOpenGL::SyncStencilTestState() {
961 const auto& regs = system.GPU().Maxwell3D().regs; 1193 auto& maxwell3d = system.GPU().Maxwell3D();
1194 if (!maxwell3d.dirty.stencil_test) {
1195 return;
1196 }
1197 maxwell3d.dirty.stencil_test = false;
1198
1199 const auto& regs = maxwell3d.regs;
962 state.stencil.test_enabled = regs.stencil_enable != 0; 1200 state.stencil.test_enabled = regs.stencil_enable != 0;
1201 state.MarkDirtyStencilState();
963 1202
964 if (!regs.stencil_enable) { 1203 if (!regs.stencil_enable) {
965 return; 1204 return;
@@ -992,7 +1231,12 @@ void RasterizerOpenGL::SyncStencilTestState() {
992} 1231}
993 1232
994void RasterizerOpenGL::SyncColorMask() { 1233void RasterizerOpenGL::SyncColorMask() {
995 const auto& regs = system.GPU().Maxwell3D().regs; 1234 auto& maxwell3d = system.GPU().Maxwell3D();
1235 if (!maxwell3d.dirty.color_mask) {
1236 return;
1237 }
1238 const auto& regs = maxwell3d.regs;
1239
996 const std::size_t count = 1240 const std::size_t count =
997 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1241 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
998 for (std::size_t i = 0; i < count; i++) { 1242 for (std::size_t i = 0; i < count; i++) {
@@ -1003,6 +1247,9 @@ void RasterizerOpenGL::SyncColorMask() {
1003 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; 1247 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
1004 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; 1248 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
1005 } 1249 }
1250
1251 state.MarkDirtyColorMask();
1252 maxwell3d.dirty.color_mask = false;
1006} 1253}
1007 1254
1008void RasterizerOpenGL::SyncMultiSampleState() { 1255void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1017,7 +1264,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
1017} 1264}
1018 1265
1019void RasterizerOpenGL::SyncBlendState() { 1266void RasterizerOpenGL::SyncBlendState() {
1020 const auto& regs = system.GPU().Maxwell3D().regs; 1267 auto& maxwell3d = system.GPU().Maxwell3D();
1268 if (!maxwell3d.dirty.blend_state) {
1269 return;
1270 }
1271 const auto& regs = maxwell3d.regs;
1021 1272
1022 state.blend_color.red = regs.blend_color.r; 1273 state.blend_color.red = regs.blend_color.r;
1023 state.blend_color.green = regs.blend_color.g; 1274 state.blend_color.green = regs.blend_color.g;
@@ -1040,6 +1291,8 @@ void RasterizerOpenGL::SyncBlendState() {
1040 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1291 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1041 state.blend[i].enabled = false; 1292 state.blend[i].enabled = false;
1042 } 1293 }
1294 maxwell3d.dirty.blend_state = false;
1295 state.MarkDirtyBlendState();
1043 return; 1296 return;
1044 } 1297 }
1045 1298
@@ -1056,6 +1309,9 @@ void RasterizerOpenGL::SyncBlendState() {
1056 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1309 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1057 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1310 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1058 } 1311 }
1312
1313 state.MarkDirtyBlendState();
1314 maxwell3d.dirty.blend_state = false;
1059} 1315}
1060 1316
1061void RasterizerOpenGL::SyncLogicOpState() { 1317void RasterizerOpenGL::SyncLogicOpState() {
@@ -1107,13 +1363,21 @@ void RasterizerOpenGL::SyncPointState() {
1107} 1363}
1108 1364
1109void RasterizerOpenGL::SyncPolygonOffset() { 1365void RasterizerOpenGL::SyncPolygonOffset() {
1110 const auto& regs = system.GPU().Maxwell3D().regs; 1366 auto& maxwell3d = system.GPU().Maxwell3D();
1367 if (!maxwell3d.dirty.polygon_offset) {
1368 return;
1369 }
1370 const auto& regs = maxwell3d.regs;
1371
1111 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1372 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1112 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1373 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1113 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1374 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1114 state.polygon_offset.units = regs.polygon_offset_units; 1375 state.polygon_offset.units = regs.polygon_offset_units;
1115 state.polygon_offset.factor = regs.polygon_offset_factor; 1376 state.polygon_offset.factor = regs.polygon_offset_factor;
1116 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1377 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1378
1379 state.MarkDirtyPolygonOffset();
1380 maxwell3d.dirty.polygon_offset = false;
1117} 1381}
1118 1382
1119void RasterizerOpenGL::SyncAlphaTest() { 1383void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f7671ff5d..eada752e0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,15 +23,16 @@
23#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_global_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h" 28#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state.h" 32#include "video_core/renderer_opengl/gl_state.h"
33#include "video_core/renderer_opengl/gl_texture_cache.h"
34#include "video_core/renderer_opengl/utils.h" 34#include "video_core/renderer_opengl/utils.h"
35#include "video_core/textures/texture.h"
35 36
36namespace Core { 37namespace Core {
37class System; 38class System;
@@ -41,11 +42,14 @@ namespace Core::Frontend {
41class EmuWindow; 42class EmuWindow;
42} 43}
43 44
45namespace Tegra {
46class MemoryManager;
47}
48
44namespace OpenGL { 49namespace OpenGL {
45 50
46struct ScreenInfo; 51struct ScreenInfo;
47struct DrawParameters; 52struct DrawParameters;
48struct FramebufferCacheKey;
49 53
50class RasterizerOpenGL : public VideoCore::RasterizerInterface { 54class RasterizerOpenGL : public VideoCore::RasterizerInterface {
51public: 55public:
@@ -55,14 +59,16 @@ public:
55 59
56 void DrawArrays() override; 60 void DrawArrays() override;
57 void Clear() override; 61 void Clear() override;
62 void DispatchCompute(GPUVAddr code_addr) override;
58 void FlushAll() override; 63 void FlushAll() override;
59 void FlushRegion(CacheAddr addr, u64 size) override; 64 void FlushRegion(CacheAddr addr, u64 size) override;
60 void InvalidateRegion(CacheAddr addr, u64 size) override; 65 void InvalidateRegion(CacheAddr addr, u64 size) override;
61 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 66 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
67 void FlushCommands() override;
68 void TickFrame() override;
62 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 69 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
63 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 70 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
64 const Common::Rectangle<u32>& src_rect, 71 const Tegra::Engines::Fermi2D::Config& copy_config) override;
65 const Common::Rectangle<u32>& dst_rect) override;
66 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 72 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
67 u32 pixel_stride) override; 73 u32 pixel_stride) override;
68 bool AccelerateDrawBatch(bool is_indexed) override; 74 bool AccelerateDrawBatch(bool is_indexed) override;
@@ -70,11 +76,6 @@ public:
70 void LoadDiskResources(const std::atomic_bool& stop_loading, 76 void LoadDiskResources(const std::atomic_bool& stop_loading,
71 const VideoCore::DiskResourceLoadCallback& callback) override; 77 const VideoCore::DiskResourceLoadCallback& callback) override;
72 78
73 /// Maximum supported size that a constbuffer can have in bytes.
74 static constexpr std::size_t MaxConstbufferSize = 0x10000;
75 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
76 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
77
78private: 79private:
79 struct FramebufferConfigState { 80 struct FramebufferConfigState {
80 bool using_color_fb{}; 81 bool using_color_fb{};
@@ -95,32 +96,64 @@ private:
95 96
96 /** 97 /**
97 * Configures the color and depth framebuffer states. 98 * Configures the color and depth framebuffer states.
98 * @param use_color_fb If true, configure color framebuffers. 99 *
99 * @param using_depth_fb If true, configure the depth/stencil framebuffer. 100 * @param current_state The current OpenGL state.
100 * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. 101 * @param using_color_fb If true, configure color framebuffers.
102 * @param using_depth_fb If true, configure the depth/stencil framebuffer.
103 * @param preserve_contents If true, tries to preserve data from a previously used
104 * framebuffer.
101 * @param single_color_target Specifies if a single color buffer target should be used. 105 * @param single_color_target Specifies if a single color buffer target should be used.
106 *
102 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture 107 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
103 * (requires using_depth_fb to be true) 108 * (requires using_depth_fb to be true)
104 */ 109 */
105 std::pair<bool, bool> ConfigureFramebuffers( 110 std::pair<bool, bool> ConfigureFramebuffers(
106 OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, 111 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
107 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); 112 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
108 113
114 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
115 bool using_depth_fb, bool using_stencil_fb);
116
109 /// Configures the current constbuffers to use for the draw command. 117 /// Configures the current constbuffers to use for the draw command.
110 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 118 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
111 const Shader& shader); 119 const Shader& shader);
112 120
121 /// Configures the current constbuffers to use for the kernel invocation.
122 void SetupComputeConstBuffers(const Shader& kernel);
123
113 /// Configures a constant buffer. 124 /// Configures a constant buffer.
114 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 125 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
115 const GLShader::ConstBufferEntry& entry); 126 const GLShader::ConstBufferEntry& entry);
116 127
117 /// Configures the current global memory entries to use for the draw command. 128 /// Configures the current global memory entries to use for the draw command.
118 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 129 void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
119 const Shader& shader); 130 const Shader& shader);
131
132 /// Configures the current global memory entries to use for the kernel invocation.
133 void SetupComputeGlobalMemory(const Shader& kernel);
134
135 /// Configures a constant buffer.
136 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
137 std::size_t size);
138
139 /// Configures the current textures to use for the draw command. Returns shaders texture buffer
140 /// usage.
141 TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
142 const Shader& shader, BaseBindings base_bindings);
143
144 /// Configures the textures used in a compute shader. Returns texture buffer usage.
145 TextureBufferUsage SetupComputeTextures(const Shader& kernel);
146
147 /// Configures a texture. Returns true when the texture is a texture buffer.
148 bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
149 const GLShader::SamplerEntry& entry);
120 150
121 /// Configures the current textures to use for the draw command. 151 /// Configures images in a compute shader.
122 void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, 152 void SetupComputeImages(const Shader& shader);
123 BaseBindings base_bindings); 153
154 /// Configures an image.
155 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
156 const GLShader::ImageEntry& entry);
124 157
125 /// Syncs the viewport and depth range to match the guest state 158 /// Syncs the viewport and depth range to match the guest state
126 void SyncViewport(OpenGLState& current_state); 159 void SyncViewport(OpenGLState& current_state);
@@ -181,10 +214,10 @@ private:
181 const Device device; 214 const Device device;
182 OpenGLState state; 215 OpenGLState state;
183 216
184 RasterizerCacheOpenGL res_cache; 217 TextureCacheOpenGL texture_cache;
185 ShaderCacheOpenGL shader_cache; 218 ShaderCacheOpenGL shader_cache;
186 GlobalRegionCacheOpenGL global_cache;
187 SamplerCacheOpenGL sampler_cache; 219 SamplerCacheOpenGL sampler_cache;
220 FramebufferCacheOpenGL framebuffer_cache;
188 221
189 Core::System& system; 222 Core::System& system;
190 ScreenInfo& screen_info; 223 ScreenInfo& screen_info;
@@ -195,13 +228,13 @@ private:
195 OGLVertexArray> 228 OGLVertexArray>
196 vertex_array_cache; 229 vertex_array_cache;
197 230
198 std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
199 FramebufferConfigState current_framebuffer_config_state; 231 FramebufferConfigState current_framebuffer_config_state;
200 std::pair<bool, bool> current_depth_stencil_usage{}; 232 std::pair<bool, bool> current_depth_stencil_usage{};
201 233
202 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 234 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
203 OGLBufferCache buffer_cache; 235 OGLBufferCache buffer_cache;
204 236
237 VertexArrayPushBuffer vertex_array_pushbuffer;
205 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 238 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
206 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 239 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
207 240
@@ -213,16 +246,19 @@ private:
213 GLuint SetupVertexFormat(); 246 GLuint SetupVertexFormat();
214 247
215 void SetupVertexBuffer(GLuint vao); 248 void SetupVertexBuffer(GLuint vao);
249 void SetupVertexInstances(GLuint vao);
216 250
217 DrawParameters SetupDraw(); 251 GLintptr SetupIndexBuffer();
218 252
219 void SetupShaders(GLenum primitive_mode); 253 DrawParameters SetupDraw(GLintptr index_buffer_offset);
220 254
221 void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); 255 void SetupShaders(GLenum primitive_mode);
222 256
223 enum class AccelDraw { Disabled, Arrays, Indexed }; 257 enum class AccelDraw { Disabled, Arrays, Indexed };
224 AccelDraw accelerate_draw = AccelDraw::Disabled; 258 AccelDraw accelerate_draw = AccelDraw::Disabled;
225 259
260 OGLFramebuffer clear_framebuffer;
261
226 using CachedPageMap = boost::icl::interval_map<u64, int>; 262 using CachedPageMap = boost::icl::interval_map<u64, int>;
227 CachedPageMap cached_pages; 263 CachedPageMap cached_pages;
228}; 264};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
deleted file mode 100644
index a7681902e..000000000
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ /dev/null
@@ -1,1362 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <glad/glad.h>
8
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/logging/log.h"
12#include "common/microprofile.h"
13#include "common/scope_exit.h"
14#include "core/core.h"
15#include "core/hle/kernel/process.h"
16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/memory_manager.h"
19#include "video_core/morton.h"
20#include "video_core/renderer_opengl/gl_rasterizer.h"
21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h"
24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h"
26
27namespace OpenGL {
28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
31using VideoCore::Surface::ComponentTypeFromDepthFormat;
32using VideoCore::Surface::ComponentTypeFromRenderTarget;
33using VideoCore::Surface::ComponentTypeFromTexture;
34using VideoCore::Surface::PixelFormatFromDepthFormat;
35using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
36using VideoCore::Surface::PixelFormatFromTextureFormat;
37using VideoCore::Surface::SurfaceTargetFromTextureType;
38
39struct FormatTuple {
40 GLint internal_format;
41 GLenum format;
42 GLenum type;
43 ComponentType component_type;
44 bool compressed;
45};
46
47static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
48 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
49 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
50 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
51 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
52 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
53 if (max_mip_level == 1) {
54 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
55 }
56}
57
58void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
60
61 gpu_addr = gpu_addr_;
62 host_ptr = memory_manager.GetPointer(gpu_addr_);
63 size_in_bytes = SizeInBytesRaw();
64
65 if (IsPixelFormatASTC(pixel_format)) {
66 // ASTC is uncompressed in software, in emulated as RGBA8
67 size_in_bytes_gl = width * height * depth * 4;
68 } else {
69 size_in_bytes_gl = SizeInBytesGL();
70 }
71}
72
73std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
74 bool uncompressed) const {
75 const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
76 const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
77 const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
78 u32 m_depth = (layer_only ? 1U : depth);
79 u32 m_width = MipWidth(mip_level);
80 u32 m_height = MipHeight(mip_level);
81 m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
82 m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
83 m_depth = std::max(1U, m_depth >> mip_level);
84 u32 m_block_height = MipBlockHeight(mip_level);
85 u32 m_block_depth = MipBlockDepth(mip_level);
86 return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
87 m_height, m_depth, m_block_height, m_block_depth);
88}
89
90std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
91 bool uncompressed) const {
92 std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
93 std::size_t size = 0;
94 for (u32 i = 0; i < max_mip_level; i++) {
95 size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
96 }
97 if (!force_gl && is_tiled) {
98 size = Common::AlignUp(size, block_size_bytes);
99 }
100 return size;
101}
102
103/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
104 const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
105 SurfaceParams params{};
106 params.is_tiled = config.tic.IsTiled();
107 params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
108 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
109 params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
110 params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
111 params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
112 params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
113 params.srgb_conversion);
114
115 if (config.tsc.depth_compare_enabled) {
116 // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
117 // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
118 // causes GetFormatType to properly return 'Depth' below).
119 if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
120 switch (params.pixel_format) {
121 case PixelFormat::R16S:
122 case PixelFormat::R16U:
123 case PixelFormat::R16F:
124 params.pixel_format = PixelFormat::Z16;
125 break;
126 case PixelFormat::R32F:
127 params.pixel_format = PixelFormat::Z32F;
128 break;
129 default:
130 LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
131 static_cast<u32>(params.pixel_format));
132 break;
133 }
134 }
135 }
136
137 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
138 params.type = GetFormatType(params.pixel_format);
139 UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled);
140
141 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
142 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
143 if (!params.is_tiled) {
144 params.pitch = config.tic.Pitch();
145 }
146 params.unaligned_height = config.tic.Height();
147 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
148 params.identity = SurfaceClass::Uploaded;
149
150 switch (params.target) {
151 case SurfaceTarget::Texture1D:
152 case SurfaceTarget::Texture2D:
153 params.depth = 1;
154 break;
155 case SurfaceTarget::TextureCubemap:
156 params.depth = config.tic.Depth() * 6;
157 break;
158 case SurfaceTarget::Texture3D:
159 params.depth = config.tic.Depth();
160 break;
161 case SurfaceTarget::Texture2DArray:
162 params.depth = config.tic.Depth();
163 if (!entry.IsArray()) {
164 // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
165 // one, but sample the texture in the shader as if it were not an array texture. This
166 // probably is valid on hardware, but we still need to write a test to confirm this. In
167 // emulation, the workaround here is to continue to treat this as a Texture2D. An
168 // example game that does this is Super Mario Odyssey (in Cloud Kingdom).
169 ASSERT(params.depth == 1);
170 params.target = SurfaceTarget::Texture2D;
171 }
172 break;
173 case SurfaceTarget::TextureCubeArray:
174 params.depth = config.tic.Depth() * 6;
175 if (!entry.IsArray()) {
176 ASSERT(params.depth == 6);
177 params.target = SurfaceTarget::TextureCubemap;
178 }
179 break;
180 default:
181 LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
182 UNREACHABLE();
183 params.depth = 1;
184 break;
185 }
186
187 params.is_layered = SurfaceTargetIsLayered(params.target);
188 params.is_array = SurfaceTargetIsArray(params.target);
189 params.max_mip_level = config.tic.max_mip_level + 1;
190 params.rt = {};
191
192 params.InitCacheParameters(config.tic.Address());
193
194 return params;
195}
196
197/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
198 const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
199 SurfaceParams params{};
200
201 params.is_tiled =
202 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
203 params.block_width = 1 << config.memory_layout.block_width;
204 params.block_height = 1 << config.memory_layout.block_height;
205 params.block_depth = 1 << config.memory_layout.block_depth;
206 params.tile_width_spacing = 1;
207 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
208 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
209 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
210 params.component_type = ComponentTypeFromRenderTarget(config.format);
211 params.type = GetFormatType(params.pixel_format);
212 if (params.is_tiled) {
213 params.width = config.width;
214 } else {
215 params.pitch = config.width;
216 const u32 bpp = params.GetFormatBpp() / 8;
217 params.width = params.pitch / bpp;
218 }
219 params.height = config.height;
220 params.unaligned_height = config.height;
221 params.target = SurfaceTarget::Texture2D;
222 params.identity = SurfaceClass::RenderTarget;
223 params.depth = 1;
224 params.max_mip_level = 1;
225 params.is_layered = false;
226
227 // Render target specific parameters, not used for caching
228 params.rt.index = static_cast<u32>(index);
229 params.rt.array_mode = config.array_mode;
230 params.rt.layer_stride = config.layer_stride;
231 params.rt.volume = config.volume;
232 params.rt.base_layer = config.base_layer;
233
234 params.InitCacheParameters(config.Address());
235
236 return params;
237}
238
239/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
240 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
241 u32 block_width, u32 block_height, u32 block_depth,
242 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
243 SurfaceParams params{};
244
245 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
246 params.block_width = 1 << std::min(block_width, 5U);
247 params.block_height = 1 << std::min(block_height, 5U);
248 params.block_depth = 1 << std::min(block_depth, 5U);
249 params.tile_width_spacing = 1;
250 params.pixel_format = PixelFormatFromDepthFormat(format);
251 params.component_type = ComponentTypeFromDepthFormat(format);
252 params.type = GetFormatType(params.pixel_format);
253 params.srgb_conversion = false;
254 params.width = zeta_width;
255 params.height = zeta_height;
256 params.unaligned_height = zeta_height;
257 params.target = SurfaceTarget::Texture2D;
258 params.identity = SurfaceClass::DepthBuffer;
259 params.depth = 1;
260 params.max_mip_level = 1;
261 params.is_layered = false;
262 params.rt = {};
263
264 params.InitCacheParameters(zeta_address);
265
266 return params;
267}
268
269/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
270 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
271 SurfaceParams params{};
272
273 params.is_tiled = !config.linear;
274 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
275 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
276 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
277 params.tile_width_spacing = 1;
278 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
279 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
280 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
281 params.component_type = ComponentTypeFromRenderTarget(config.format);
282 params.type = GetFormatType(params.pixel_format);
283 params.width = config.width;
284 params.pitch = config.pitch;
285 params.height = config.height;
286 params.unaligned_height = config.height;
287 params.target = SurfaceTarget::Texture2D;
288 params.identity = SurfaceClass::Copy;
289 params.depth = 1;
290 params.max_mip_level = 1;
291 params.rt = {};
292
293 params.InitCacheParameters(config.Address());
294
295 return params;
296}
297
298static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
299 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
300 {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
301 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
302 {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
303 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
304 false}, // A2B10G10R10U
305 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
306 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
307 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
308 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
309 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
310 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
311 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
312 false}, // R11FG11FB10F
313 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
314 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
315 true}, // DXT1
316 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
317 true}, // DXT23
318 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
319 true}, // DXT45
320 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
321 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
322 true}, // DXN2UNORM
323 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
324 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
325 true}, // BC7U
326 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
327 true}, // BC6H_UF16
328 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
329 true}, // BC6H_SF16
330 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
331 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
332 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
333 {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
334 {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
335 {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
336 {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
337 {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
338 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
339 {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
340 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
341 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
342 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
343 {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
344 {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
345 {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
346 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
347 false}, // RGBA8_SRGB
348 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
349 {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
350 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
351 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
352 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
353 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
354 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
355 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
356 // Compressed sRGB formats
357 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
358 true}, // DXT1_SRGB
359 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
360 true}, // DXT23_SRGB
361 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
362 true}, // DXT45_SRGB
363 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
364 true}, // BC7U_SRGB
365 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
366 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
367 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
368 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
369 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
370 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
371 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
372 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
373
374 // Depth formats
375 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
376 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
377 false}, // Z16
378
379 // DepthStencil formats
380 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
381 false}, // Z24S8
382 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
383 false}, // S8Z24
384 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
385 ComponentType::Float, false}, // Z32FS8
386}};
387
388static GLenum SurfaceTargetToGL(SurfaceTarget target) {
389 switch (target) {
390 case SurfaceTarget::Texture1D:
391 return GL_TEXTURE_1D;
392 case SurfaceTarget::Texture2D:
393 return GL_TEXTURE_2D;
394 case SurfaceTarget::Texture3D:
395 return GL_TEXTURE_3D;
396 case SurfaceTarget::Texture1DArray:
397 return GL_TEXTURE_1D_ARRAY;
398 case SurfaceTarget::Texture2DArray:
399 return GL_TEXTURE_2D_ARRAY;
400 case SurfaceTarget::TextureCubemap:
401 return GL_TEXTURE_CUBE_MAP;
402 case SurfaceTarget::TextureCubeArray:
403 return GL_TEXTURE_CUBE_MAP_ARRAY;
404 }
405 LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
406 UNREACHABLE();
407 return {};
408}
409
410static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
411 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
412 auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
413 ASSERT(component_type == format.component_type);
414
415 return format;
416}
417
418/// Returns the discrepant array target
419constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
420 switch (target) {
421 case SurfaceTarget::Texture1D:
422 return GL_TEXTURE_1D_ARRAY;
423 case SurfaceTarget::Texture2D:
424 return GL_TEXTURE_2D_ARRAY;
425 case SurfaceTarget::Texture3D:
426 return GL_NONE;
427 case SurfaceTarget::Texture1DArray:
428 return GL_TEXTURE_1D;
429 case SurfaceTarget::Texture2DArray:
430 return GL_TEXTURE_2D;
431 case SurfaceTarget::TextureCubemap:
432 return GL_TEXTURE_CUBE_MAP_ARRAY;
433 case SurfaceTarget::TextureCubeArray:
434 return GL_TEXTURE_CUBE_MAP;
435 }
436 return GL_NONE;
437}
438
439Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
440 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
441 if (IsPixelFormatASTC(pixel_format)) {
442 // ASTC formats must stop at the ATSC block size boundary
443 actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
444 }
445 return {0, actual_height, MipWidth(mip_level), 0};
446}
447
448void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
449 std::vector<u8>& gl_buffer, u32 mip_level) {
450 u32 depth = params.MipDepth(mip_level);
451 if (params.target == SurfaceTarget::Texture2D) {
452 // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
453 depth = 1U;
454 }
455 if (params.is_layered) {
456 u64 offset = params.GetMipmapLevelOffset(mip_level);
457 u64 offset_gl = 0;
458 const u64 layer_size = params.LayerMemorySize();
459 const u64 gl_size = params.LayerSizeGL(mip_level);
460 for (u32 i = 0; i < params.depth; i++) {
461 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
462 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
463 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
464 gl_buffer.data() + offset_gl, params.host_ptr + offset);
465 offset += layer_size;
466 offset_gl += gl_size;
467 }
468 } else {
469 const u64 offset = params.GetMipmapLevelOffset(mip_level);
470 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
471 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
472 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
473 gl_buffer.data(), params.host_ptr + offset);
474 }
475}
476
477void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
478 const Surface& dst_surface) {
479 const auto& src_params{src_surface->GetSurfaceParams()};
480 const auto& dst_params{dst_surface->GetSurfaceParams()};
481
482 const u32 width{std::min(src_params.width, dst_params.width)};
483 const u32 height{std::min(src_params.height, dst_params.height)};
484
485 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
486 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
487 0, 0, width, height, 1);
488
489 dst_surface->MarkAsModified(true, *this);
490}
491
492MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
493void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
494 const GLuint copy_pbo_handle, const GLenum src_attachment,
495 const GLenum dst_attachment,
496 const std::size_t cubemap_face) {
497 MICROPROFILE_SCOPE(OpenGL_CopySurface);
498 ASSERT_MSG(dst_attachment == 0, "Unimplemented");
499
500 const auto& src_params{src_surface->GetSurfaceParams()};
501 const auto& dst_params{dst_surface->GetSurfaceParams()};
502
503 const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
504 const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
505
506 const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
507
508 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
509 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY);
510 if (source_format.compressed) {
511 glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
512 static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
513 } else {
514 glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
515 source_format.type, static_cast<GLsizei>(src_params.size_in_bytes),
516 nullptr);
517 }
518 // If the new texture is bigger than the previous one, we need to fill in the rest with data
519 // from the CPU.
520 if (src_params.size_in_bytes < dst_params.size_in_bytes) {
521 // Upload the rest of the memory.
522 if (dst_params.is_tiled) {
523 // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
524 // of the data in this case. Games like Super Mario Odyssey seem to hit this case
525 // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
526 // but it doesn't clear it beforehand, the texture is already full of zeros.
527 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
528 "reinterpretation but the texture is tiled.");
529 }
530 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
531 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
532 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
533 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
534 }
535
536 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
537
538 const GLsizei width{static_cast<GLsizei>(
539 std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
540 const GLsizei height{static_cast<GLsizei>(
541 std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
542
543 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
544 if (dest_format.compressed) {
545 LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
546 UNREACHABLE();
547 } else {
548 switch (dst_params.target) {
549 case SurfaceTarget::Texture1D:
550 glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
551 dest_format.type, nullptr);
552 break;
553 case SurfaceTarget::Texture2D:
554 glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
555 dest_format.format, dest_format.type, nullptr);
556 break;
557 case SurfaceTarget::Texture3D:
558 case SurfaceTarget::Texture2DArray:
559 case SurfaceTarget::TextureCubeArray:
560 glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
561 static_cast<GLsizei>(dst_params.depth), dest_format.format,
562 dest_format.type, nullptr);
563 break;
564 case SurfaceTarget::TextureCubemap:
565 glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
566 static_cast<GLint>(cubemap_face), width, height, 1,
567 dest_format.format, dest_format.type, nullptr);
568 break;
569 default:
570 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
571 static_cast<u32>(dst_params.target));
572 UNREACHABLE();
573 }
574 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
575 }
576
577 dst_surface->MarkAsModified(true, *this);
578}
579
580CachedSurface::CachedSurface(const SurfaceParams& params)
581 : RasterizerCacheObject{params.host_ptr}, params{params},
582 gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
583
584 const auto optional_cpu_addr{
585 Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
586 ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
587 cpu_addr = *optional_cpu_addr;
588
589 texture.Create(gl_target);
590
591 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
592 // alternatives. This signals a bug on those functions.
593 const auto width = static_cast<GLsizei>(params.MipWidth(0));
594 const auto height = static_cast<GLsizei>(params.MipHeight(0));
595 memory_size = params.MemorySize();
596 reinterpreted = false;
597
598 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
599 gl_internal_format = format_tuple.internal_format;
600
601 switch (params.target) {
602 case SurfaceTarget::Texture1D:
603 glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
604 width);
605 break;
606 case SurfaceTarget::Texture2D:
607 case SurfaceTarget::TextureCubemap:
608 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
609 width, height);
610 break;
611 case SurfaceTarget::Texture3D:
612 case SurfaceTarget::Texture2DArray:
613 case SurfaceTarget::TextureCubeArray:
614 glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
615 width, height, params.depth);
616 break;
617 default:
618 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
619 static_cast<u32>(params.target));
620 UNREACHABLE();
621 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
622 width, height);
623 }
624
625 ApplyTextureDefaults(texture.handle, params.max_mip_level);
626
627 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
628}
629
630MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
631void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
632 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
633 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
634 if (gl_buffer.size() < params.max_mip_level)
635 gl_buffer.resize(params.max_mip_level);
636 for (u32 i = 0; i < params.max_mip_level; i++)
637 gl_buffer[i].resize(params.GetMipmapSizeGL(i));
638 if (params.is_tiled) {
639 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
640 params.block_width, static_cast<u32>(params.target));
641 for (u32 i = 0; i < params.max_mip_level; i++)
642 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
643 } else {
644 const u32 bpp = params.GetFormatBpp() / 8;
645 const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) /
646 GetDefaultBlockWidth(params.pixel_format);
647 if (params.pitch == copy_size) {
648 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
649 } else {
650 const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) /
651 GetDefaultBlockHeight(params.pixel_format);
652 const u8* start{params.host_ptr};
653 u8* write_to = gl_buffer[0].data();
654 for (u32 h = height; h > 0; h--) {
655 std::memcpy(write_to, start, copy_size);
656 start += params.pitch;
657 write_to += copy_size;
658 }
659 }
660 }
661 for (u32 i = 0; i < params.max_mip_level; i++) {
662 const u32 width = params.MipWidth(i);
663 const u32 height = params.MipHeight(i);
664 const u32 depth = params.MipDepth(i);
665 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
666 // Reserve size for RGBA8 conversion
667 constexpr std::size_t rgba_bpp = 4;
668 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
669 }
670 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
671 height, depth, true, true);
672 }
673}
674
675MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
676void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
677 MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
678
679 ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
680
681 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
682 // OpenGL temporary buffer needs to be big enough to store raw texture size
683 gl_buffer[0].resize(GetSizeInBytes());
684
685 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
686 const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
687 glPixelStorei(GL_PACK_ALIGNMENT, align);
688 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
689 ASSERT(!tuple.compressed);
690 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
691 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
692 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
693 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
694 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
695 params.height, params.depth, true, true);
696 if (params.is_tiled) {
697 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
698 params.block_width, static_cast<u32>(params.target));
699
700 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
701 } else {
702 const u32 bpp = params.GetFormatBpp() / 8;
703 const u32 copy_size = params.width * bpp;
704 if (params.pitch == copy_size) {
705 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
706 } else {
707 u8* start{params.host_ptr};
708 const u8* read_to = gl_buffer[0].data();
709 for (u32 h = params.height; h > 0; h--) {
710 std::memcpy(start, read_to, copy_size);
711 start += params.pitch;
712 read_to += copy_size;
713 }
714 }
715 }
716}
717
718void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
719 GLuint read_fb_handle, GLuint draw_fb_handle) {
720 const auto& rect{params.GetRect(mip_map)};
721
722 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
723
724 // Load data from memory to the surface
725 const auto x0 = static_cast<GLint>(rect.left);
726 const auto y0 = static_cast<GLint>(rect.bottom);
727 auto buffer_offset =
728 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
729 static_cast<std::size_t>(x0)) *
730 GetBytesPerPixel(params.pixel_format);
731
732 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
733
734 const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
735 glPixelStorei(GL_UNPACK_ALIGNMENT, align);
736 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
737
738 const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
739 if (tuple.compressed) {
740 switch (params.target) {
741 case SurfaceTarget::Texture2D:
742 glCompressedTextureSubImage2D(
743 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
744 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
745 &gl_buffer[mip_map][buffer_offset]);
746 break;
747 case SurfaceTarget::Texture3D:
748 glCompressedTextureSubImage3D(
749 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
750 static_cast<GLsizei>(params.MipHeight(mip_map)),
751 static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
752 &gl_buffer[mip_map][buffer_offset]);
753 break;
754 case SurfaceTarget::Texture2DArray:
755 case SurfaceTarget::TextureCubeArray:
756 glCompressedTextureSubImage3D(
757 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
758 static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
759 tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
760 break;
761 case SurfaceTarget::TextureCubemap: {
762 const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
763 for (std::size_t face = 0; face < params.depth; ++face) {
764 glCompressedTextureSubImage3D(
765 texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
766 static_cast<GLsizei>(params.MipWidth(mip_map)),
767 static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
768 layer_size, &gl_buffer[mip_map][buffer_offset]);
769 buffer_offset += layer_size;
770 }
771 break;
772 }
773 default:
774 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
775 static_cast<u32>(params.target));
776 UNREACHABLE();
777 glCompressedTextureSubImage2D(
778 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
779 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
780 static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
781 }
782 } else {
783 switch (params.target) {
784 case SurfaceTarget::Texture1D:
785 glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
786 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
787 break;
788 case SurfaceTarget::Texture2D:
789 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
790 static_cast<GLsizei>(rect.GetWidth()),
791 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
792 &gl_buffer[mip_map][buffer_offset]);
793 break;
794 case SurfaceTarget::Texture3D:
795 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
796 static_cast<GLsizei>(rect.GetWidth()),
797 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
798 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
799 break;
800 case SurfaceTarget::Texture2DArray:
801 case SurfaceTarget::TextureCubeArray:
802 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
803 static_cast<GLsizei>(rect.GetWidth()),
804 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
805 tuple.type, &gl_buffer[mip_map][buffer_offset]);
806 break;
807 case SurfaceTarget::TextureCubemap: {
808 for (std::size_t face = 0; face < params.depth; ++face) {
809 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
810 static_cast<GLsizei>(rect.GetWidth()),
811 static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
812 tuple.type, &gl_buffer[mip_map][buffer_offset]);
813 buffer_offset += params.LayerSizeGL(mip_map);
814 }
815 break;
816 }
817 default:
818 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
819 static_cast<u32>(params.target));
820 UNREACHABLE();
821 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
822 static_cast<GLsizei>(rect.GetWidth()),
823 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
824 &gl_buffer[mip_map][buffer_offset]);
825 }
826 }
827
828 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
829}
830
831void CachedSurface::EnsureTextureDiscrepantView() {
832 if (discrepant_view.handle != 0)
833 return;
834
835 const GLenum target{GetArrayDiscrepantTarget(params.target)};
836 ASSERT(target != GL_NONE);
837
838 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
839 constexpr GLuint min_layer = 0;
840 constexpr GLuint min_level = 0;
841
842 glGenTextures(1, &discrepant_view.handle);
843 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
844 params.max_mip_level, min_layer, num_layers);
845 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
846 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
847 reinterpret_cast<const GLint*>(swizzle.data()));
848}
849
850MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
851void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
852 GLuint read_fb_handle, GLuint draw_fb_handle) {
853 MICROPROFILE_SCOPE(OpenGL_TextureUL);
854
855 for (u32 i = 0; i < params.max_mip_level; i++)
856 UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
857}
858
859void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
860 Tegra::Texture::SwizzleSource swizzle_y,
861 Tegra::Texture::SwizzleSource swizzle_z,
862 Tegra::Texture::SwizzleSource swizzle_w) {
863 const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
864 const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
865 const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
866 const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
867 if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
868 return;
869 }
870 swizzle = {new_x, new_y, new_z, new_w};
871 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
872 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
873 if (discrepant_view.handle != 0) {
874 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
875 }
876}
877
878RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
879 : RasterizerCache{rasterizer} {
880 read_framebuffer.Create();
881 draw_framebuffer.Create();
882 copy_pbo.Create();
883}
884
885Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
886 const GLShader::SamplerEntry& entry) {
887 return GetSurface(SurfaceParams::CreateForTexture(config, entry));
888}
889
890Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
891 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
892 const auto& regs{gpu.regs};
893
894 if (!gpu.dirty_flags.zeta_buffer) {
895 return last_depth_buffer;
896 }
897 gpu.dirty_flags.zeta_buffer = false;
898
899 if (!regs.zeta.Address() || !regs.zeta_enable) {
900 return last_depth_buffer = {};
901 }
902
903 SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
904 regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format,
905 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
906 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
907
908 return last_depth_buffer = GetSurface(depth_params, preserve_contents);
909}
910
911Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
912 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
913 const auto& regs{gpu.regs};
914
915 if (!gpu.dirty_flags.color_buffer[index]) {
916 return current_color_buffers[index];
917 }
918 gpu.dirty_flags.color_buffer.reset(index);
919
920 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
921
922 if (index >= regs.rt_control.count) {
923 return current_color_buffers[index] = {};
924 }
925
926 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
927 return current_color_buffers[index] = {};
928 }
929
930 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
931
932 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
933}
934
935void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
936 surface->LoadGLBuffer(temporal_memory);
937 surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
938 surface->MarkAsModified(false, *this);
939 surface->MarkForReload(false);
940}
941
942Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
943 if (!params.IsValid()) {
944 return {};
945 }
946
947 // Look up surface in the cache based on address
948 Surface surface{TryGet(params.host_ptr)};
949 if (surface) {
950 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
951 // Use the cached surface as-is unless it's not synced with memory
952 if (surface->MustReload())
953 LoadSurface(surface);
954 return surface;
955 } else if (preserve_contents) {
956 // If surface parameters changed and we care about keeping the previous data, recreate
957 // the surface from the old one
958 Surface new_surface{RecreateSurface(surface, params)};
959 Unregister(surface);
960 Register(new_surface);
961 if (new_surface->IsUploaded()) {
962 RegisterReinterpretSurface(new_surface);
963 }
964 return new_surface;
965 } else {
966 // Delete the old surface before creating a new one to prevent collisions.
967 Unregister(surface);
968 }
969 }
970
971 // No cached surface found - get a new one
972 surface = GetUncachedSurface(params);
973 Register(surface);
974
975 // Only load surface from memory if we care about the contents
976 if (preserve_contents) {
977 LoadSurface(surface);
978 }
979
980 return surface;
981}
982
983Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
984 Surface surface{TryGetReservedSurface(params)};
985 if (!surface) {
986 // No reserved surface available, create a new one and reserve it
987 surface = std::make_shared<CachedSurface>(params);
988 ReserveSurface(surface);
989 }
990 return surface;
991}
992
993void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
994 const Surface& dst_surface) {
995 const auto& init_params{src_surface->GetSurfaceParams()};
996 const auto& dst_params{dst_surface->GetSurfaceParams()};
997 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
998 GPUVAddr address{init_params.gpu_addr};
999 const std::size_t layer_size{dst_params.LayerMemorySize()};
1000 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1001 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1002 const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
1003 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
1004 if (!copy) {
1005 continue;
1006 }
1007 const auto& src_params{copy->GetSurfaceParams()};
1008 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1009 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
1010
1011 glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0,
1012 0, 0, dst_surface->Texture().handle,
1013 SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width,
1014 height, 1);
1015 }
1016 address += layer_size;
1017 }
1018
1019 dst_surface->MarkAsModified(true, *this);
1020}
1021
1022static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1023 const Common::Rectangle<u32>& src_rect,
1024 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1025 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1026 std::size_t cubemap_face = 0) {
1027
1028 const auto& src_params{src_surface->GetSurfaceParams()};
1029 const auto& dst_params{dst_surface->GetSurfaceParams()};
1030
1031 OpenGLState prev_state{OpenGLState::GetCurState()};
1032 SCOPE_EXIT({ prev_state.Apply(); });
1033
1034 OpenGLState state;
1035 state.draw.read_framebuffer = read_fb_handle;
1036 state.draw.draw_framebuffer = draw_fb_handle;
1037 state.Apply();
1038
1039 u32 buffers{};
1040
1041 if (src_params.type == SurfaceType::ColorTexture) {
1042 switch (src_params.target) {
1043 case SurfaceTarget::Texture2D:
1044 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1045 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1046 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1047 0, 0);
1048 break;
1049 case SurfaceTarget::TextureCubemap:
1050 glFramebufferTexture2D(
1051 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1052 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1053 src_surface->Texture().handle, 0);
1054 glFramebufferTexture2D(
1055 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1056 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1057 break;
1058 case SurfaceTarget::Texture2DArray:
1059 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1060 src_surface->Texture().handle, 0, 0);
1061 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1062 break;
1063 case SurfaceTarget::Texture3D:
1064 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1065 SurfaceTargetToGL(src_params.target),
1066 src_surface->Texture().handle, 0, 0);
1067 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1068 SurfaceTargetToGL(src_params.target), 0, 0, 0);
1069 break;
1070 default:
1071 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1072 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1073 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1074 0, 0);
1075 break;
1076 }
1077
1078 switch (dst_params.target) {
1079 case SurfaceTarget::Texture2D:
1080 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1081 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1082 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1083 0, 0);
1084 break;
1085 case SurfaceTarget::TextureCubemap:
1086 glFramebufferTexture2D(
1087 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1088 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1089 dst_surface->Texture().handle, 0);
1090 glFramebufferTexture2D(
1091 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1092 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1093 break;
1094 case SurfaceTarget::Texture2DArray:
1095 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1096 dst_surface->Texture().handle, 0, 0);
1097 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1098 break;
1099
1100 case SurfaceTarget::Texture3D:
1101 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1102 SurfaceTargetToGL(dst_params.target),
1103 dst_surface->Texture().handle, 0, 0);
1104 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1105 SurfaceTargetToGL(dst_params.target), 0, 0, 0);
1106 break;
1107 default:
1108 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1109 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1110 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1111 0, 0);
1112 break;
1113 }
1114
1115 buffers = GL_COLOR_BUFFER_BIT;
1116 } else if (src_params.type == SurfaceType::Depth) {
1117 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1118 GL_TEXTURE_2D, 0, 0);
1119 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1120 src_surface->Texture().handle, 0);
1121 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1122
1123 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1124 GL_TEXTURE_2D, 0, 0);
1125 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1126 dst_surface->Texture().handle, 0);
1127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1128
1129 buffers = GL_DEPTH_BUFFER_BIT;
1130 } else if (src_params.type == SurfaceType::DepthStencil) {
1131 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1132 GL_TEXTURE_2D, 0, 0);
1133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1134 src_surface->Texture().handle, 0);
1135
1136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1137 GL_TEXTURE_2D, 0, 0);
1138 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1139 dst_surface->Texture().handle, 0);
1140
1141 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1142 }
1143
1144 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
1145 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
1146 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
1147
1148 return true;
1149}
1150
1151void RasterizerCacheOpenGL::FermiCopySurface(
1152 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1153 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1154 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1155
1156 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1157 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1158
1159 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1160 ASSERT(src_params.block_height == dst_params.block_height);
1161 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1162 ASSERT(src_params.depth == dst_params.depth);
1163 ASSERT(src_params.target == dst_params.target);
1164 ASSERT(src_params.rt.index == dst_params.rt.index);
1165
1166 auto src_surface = GetSurface(src_params, true);
1167 auto dst_surface = GetSurface(dst_params, true);
1168
1169 BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
1170 draw_framebuffer.handle);
1171
1172 dst_surface->MarkAsModified(true, *this);
1173}
1174
1175void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1176 const Surface& dst_surface) {
1177 const auto& src_params{src_surface->GetSurfaceParams()};
1178 const auto& dst_params{dst_surface->GetSurfaceParams()};
1179
1180 // Flush enough memory for both the source and destination surface
1181 FlushRegion(ToCacheAddr(src_params.host_ptr),
1182 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1183
1184 LoadSurface(dst_surface);
1185}
1186
1187Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1188 const SurfaceParams& new_params) {
1189 // Verify surface is compatible for blitting
1190 auto old_params{old_surface->GetSurfaceParams()};
1191
1192 // Get a new surface with the new parameters, and blit the previous surface to it
1193 Surface new_surface{GetUncachedSurface(new_params)};
1194
1195 // With use_accurate_gpu_emulation enabled, do an accurate surface copy
1196 if (Settings::values.use_accurate_gpu_emulation) {
1197 AccurateCopySurface(old_surface, new_surface);
1198 return new_surface;
1199 }
1200
1201 const bool old_compressed =
1202 GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
1203 const bool new_compressed =
1204 GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
1205 const bool compatible_formats =
1206 GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
1207 !(old_compressed || new_compressed);
1208 // For compatible surfaces, we can just do fast glCopyImageSubData based copy
1209 if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
1210 old_params.depth == 1 && compatible_formats) {
1211 FastCopySurface(old_surface, new_surface);
1212 return new_surface;
1213 }
1214
1215 switch (new_params.target) {
1216 case SurfaceTarget::Texture2D:
1217 CopySurface(old_surface, new_surface, copy_pbo.handle);
1218 break;
1219 case SurfaceTarget::Texture3D:
1220 AccurateCopySurface(old_surface, new_surface);
1221 break;
1222 case SurfaceTarget::TextureCubemap:
1223 case SurfaceTarget::Texture2DArray:
1224 case SurfaceTarget::TextureCubeArray:
1225 if (compatible_formats)
1226 FastLayeredCopySurface(old_surface, new_surface);
1227 else {
1228 AccurateCopySurface(old_surface, new_surface);
1229 }
1230 break;
1231 default:
1232 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
1233 static_cast<u32>(new_params.target));
1234 UNREACHABLE();
1235 }
1236
1237 return new_surface;
1238}
1239
1240Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1241 return TryGet(host_ptr);
1242}
1243
1244void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
1245 const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())};
1246 surface_reserve[surface_reserve_key] = surface;
1247}
1248
1249Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) {
1250 const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
1251 auto search{surface_reserve.find(surface_reserve_key)};
1252 if (search != surface_reserve.end()) {
1253 return search->second;
1254 }
1255 return {};
1256}
1257
1258static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1259 u32 height) {
1260 for (u32 i = 0; i < params.max_mip_level; i++) {
1261 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1262 return {i};
1263 }
1264 }
1265 return {};
1266}
1267
1268static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
1269 const std::size_t size{params.LayerMemorySize()};
1270 GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1271 for (u32 i = 0; i < params.depth; i++) {
1272 if (start == addr) {
1273 return {i};
1274 }
1275 start += size;
1276 }
1277 return {};
1278}
1279
1280static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1281 const Surface blitted_surface) {
1282 const auto& dst_params = blitted_surface->GetSurfaceParams();
1283 const auto& src_params = render_surface->GetSurfaceParams();
1284 const std::size_t src_memory_size = src_params.size_in_bytes;
1285 const std::optional<u32> level =
1286 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1287 if (level.has_value()) {
1288 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1289 src_params.height == dst_params.MipHeight(*level) &&
1290 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1291 const std::optional<u32> slot =
1292 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1293 if (slot.has_value()) {
1294 glCopyImageSubData(render_surface->Texture().handle,
1295 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1296 blitted_surface->Texture().handle,
1297 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1298 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1299 blitted_surface->MarkAsModified(true, cache);
1300 return true;
1301 }
1302 }
1303 }
1304 return false;
1305}
1306
1307static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1308 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1309 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1310 if (bound2 > bound1)
1311 return true;
1312 const auto& dst_params = blitted_surface->GetSurfaceParams();
1313 const auto& src_params = render_surface->GetSurfaceParams();
1314 return (dst_params.component_type != src_params.component_type);
1315}
1316
1317static bool IsReinterpretInvalidSecond(const Surface render_surface,
1318 const Surface blitted_surface) {
1319 const auto& dst_params = blitted_surface->GetSurfaceParams();
1320 const auto& src_params = render_surface->GetSurfaceParams();
1321 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1322}
1323
1324bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1325 Surface intersect) {
1326 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1327 Unregister(intersect);
1328 return false;
1329 }
1330 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1331 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1332 Unregister(intersect);
1333 return false;
1334 }
1335 FlushObject(intersect);
1336 FlushObject(triggering_surface);
1337 intersect->MarkForReload(true);
1338 }
1339 return true;
1340}
1341
1342void RasterizerCacheOpenGL::SignalPreDrawCall() {
1343 if (texception && GLAD_GL_ARB_texture_barrier) {
1344 glTextureBarrier();
1345 }
1346 texception = false;
1347}
1348
1349void RasterizerCacheOpenGL::SignalPostDrawCall() {
1350 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1351 if (current_color_buffers[i] != nullptr) {
1352 Surface intersect =
1353 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1354 if (intersect != nullptr) {
1355 PartialReinterpretSurface(current_color_buffers[i], intersect);
1356 texception = true;
1357 }
1358 }
1359 }
1360}
1361
1362} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
deleted file mode 100644
index 6263ef3e7..000000000
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ /dev/null
@@ -1,572 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <string>
10#include <tuple>
11#include <vector>
12
13#include "common/alignment.h"
14#include "common/bit_util.h"
15#include "common/common_types.h"
16#include "common/hash.h"
17#include "common/math_util.h"
18#include "video_core/engines/fermi_2d.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/surface.h"
24#include "video_core/textures/decoders.h"
25#include "video_core/textures/texture.h"
26
27namespace OpenGL {
28
29class CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
32
33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38
39struct SurfaceParams {
40 enum class SurfaceClass {
41 Uploaded,
42 RenderTarget,
43 DepthBuffer,
44 Copy,
45 };
46
47 static std::string SurfaceTargetName(SurfaceTarget target) {
48 switch (target) {
49 case SurfaceTarget::Texture1D:
50 return "Texture1D";
51 case SurfaceTarget::Texture2D:
52 return "Texture2D";
53 case SurfaceTarget::Texture3D:
54 return "Texture3D";
55 case SurfaceTarget::Texture1DArray:
56 return "Texture1DArray";
57 case SurfaceTarget::Texture2DArray:
58 return "Texture2DArray";
59 case SurfaceTarget::TextureCubemap:
60 return "TextureCubemap";
61 case SurfaceTarget::TextureCubeArray:
62 return "TextureCubeArray";
63 default:
64 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
65 UNREACHABLE();
66 return fmt::format("TextureUnknown({})", static_cast<u32>(target));
67 }
68 }
69
70 u32 GetFormatBpp() const {
71 return VideoCore::Surface::GetFormatBpp(pixel_format);
72 }
73
74 /// Returns the rectangle corresponding to this surface
75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
76
77 /// Returns the total size of this surface in bytes, adjusted for compression
78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
79 const u32 compression_factor{GetCompressionFactor(pixel_format)};
80 const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
81 const size_t uncompressed_size{
82 Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
83 height, depth, block_height, block_depth)};
84
85 // Divide by compression_factor^2, as height and width are factored by this
86 return uncompressed_size / (compression_factor * compression_factor);
87 }
88
89 /// Returns the size of this surface as an OpenGL texture in bytes
90 std::size_t SizeInBytesGL() const {
91 return SizeInBytesRaw(true);
92 }
93
94 /// Returns the size of this surface as a cube face in bytes
95 std::size_t SizeInBytesCubeFace() const {
96 return size_in_bytes / 6;
97 }
98
99 /// Returns the size of this surface as an OpenGL cube face in bytes
100 std::size_t SizeInBytesCubeFaceGL() const {
101 return size_in_bytes_gl / 6;
102 }
103
104 /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
105 std::size_t MemorySize() const {
106 std::size_t size = InnerMemorySize(false, is_layered);
107 if (is_layered)
108 return size * depth;
109 return size;
110 }
111
112 /// Returns true if the parameters constitute a valid rasterizer surface.
113 bool IsValid() const {
114 return gpu_addr && host_ptr && height && width;
115 }
116
117 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
118 /// mipmaps.
119 std::size_t LayerMemorySize() const {
120 return InnerMemorySize(false, true);
121 }
122
123 /// Returns the size of a layer of this surface in OpenGL.
124 std::size_t LayerSizeGL(u32 mip_level) const {
125 return InnerMipmapMemorySize(mip_level, true, is_layered, false);
126 }
127
128 std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
129 std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
130 if (is_layered)
131 return size * depth;
132 return size;
133 }
134
135 std::size_t GetMipmapLevelOffset(u32 mip_level) const {
136 std::size_t offset = 0;
137 for (u32 i = 0; i < mip_level; i++)
138 offset += InnerMipmapMemorySize(i, false, is_layered);
139 return offset;
140 }
141
142 std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
143 std::size_t offset = 0;
144 for (u32 i = 0; i < mip_level; i++)
145 offset += InnerMipmapMemorySize(i, true, is_layered);
146 return offset;
147 }
148
149 std::size_t GetMipmapSingleSize(u32 mip_level) const {
150 return InnerMipmapMemorySize(mip_level, false, is_layered);
151 }
152
153 u32 MipWidth(u32 mip_level) const {
154 return std::max(1U, width >> mip_level);
155 }
156
157 u32 MipWidthGobAligned(u32 mip_level) const {
158 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
159 }
160
161 u32 MipHeight(u32 mip_level) const {
162 return std::max(1U, height >> mip_level);
163 }
164
165 u32 MipDepth(u32 mip_level) const {
166 return is_layered ? depth : std::max(1U, depth >> mip_level);
167 }
168
169 // Auto block resizing algorithm from:
170 // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
171 u32 MipBlockHeight(u32 mip_level) const {
172 if (mip_level == 0)
173 return block_height;
174 u32 alt_height = MipHeight(mip_level);
175 u32 h = GetDefaultBlockHeight(pixel_format);
176 u32 blocks_in_y = (alt_height + h - 1) / h;
177 u32 bh = 16;
178 while (bh > 1 && blocks_in_y <= bh * 4) {
179 bh >>= 1;
180 }
181 return bh;
182 }
183
184 u32 MipBlockDepth(u32 mip_level) const {
185 if (mip_level == 0) {
186 return block_depth;
187 }
188
189 if (is_layered) {
190 return 1;
191 }
192
193 const u32 mip_depth = MipDepth(mip_level);
194 u32 bd = 32;
195 while (bd > 1 && mip_depth * 2 <= bd) {
196 bd >>= 1;
197 }
198
199 if (bd == 32) {
200 const u32 bh = MipBlockHeight(mip_level);
201 if (bh >= 4) {
202 return 16;
203 }
204 }
205
206 return bd;
207 }
208
209 u32 RowAlign(u32 mip_level) const {
210 const u32 m_width = MipWidth(mip_level);
211 const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
212 const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
213 return (1U << l2);
214 }
215
216 /// Creates SurfaceParams from a texture configuration
217 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
218 const GLShader::SamplerEntry& entry);
219
220 /// Creates SurfaceParams from a framebuffer configuration
221 static SurfaceParams CreateForFramebuffer(std::size_t index);
222
223 /// Creates SurfaceParams for a depth buffer configuration
224 static SurfaceParams CreateForDepthBuffer(
225 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
226 u32 block_width, u32 block_height, u32 block_depth,
227 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
228
229 /// Creates SurfaceParams for a Fermi2D surface copy
230 static SurfaceParams CreateForFermiCopySurface(
231 const Tegra::Engines::Fermi2D::Regs::Surface& config);
232
233 /// Checks if surfaces are compatible for caching
234 bool IsCompatibleSurface(const SurfaceParams& other) const {
235 if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
236 std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
237 other.depth, other.is_tiled)) {
238 if (!is_tiled)
239 return true;
240 return std::tie(block_height, block_depth, tile_width_spacing) ==
241 std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
242 }
243 return false;
244 }
245
246 /// Initializes parameters for caching, should be called after everything has been initialized
247 void InitCacheParameters(GPUVAddr gpu_addr);
248
249 std::string TargetName() const {
250 switch (target) {
251 case SurfaceTarget::Texture1D:
252 return "1D";
253 case SurfaceTarget::Texture2D:
254 return "2D";
255 case SurfaceTarget::Texture3D:
256 return "3D";
257 case SurfaceTarget::Texture1DArray:
258 return "1DArray";
259 case SurfaceTarget::Texture2DArray:
260 return "2DArray";
261 case SurfaceTarget::TextureCubemap:
262 return "Cube";
263 default:
264 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
265 UNREACHABLE();
266 return fmt::format("TUK({})", static_cast<u32>(target));
267 }
268 }
269
270 std::string ClassName() const {
271 switch (identity) {
272 case SurfaceClass::Uploaded:
273 return "UP";
274 case SurfaceClass::RenderTarget:
275 return "RT";
276 case SurfaceClass::DepthBuffer:
277 return "DB";
278 case SurfaceClass::Copy:
279 return "CP";
280 default:
281 LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
282 UNREACHABLE();
283 return fmt::format("CUK({})", static_cast<u32>(identity));
284 }
285 }
286
287 std::string IdentityString() const {
288 return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
289 }
290
291 bool is_tiled;
292 u32 block_width;
293 u32 block_height;
294 u32 block_depth;
295 u32 tile_width_spacing;
296 PixelFormat pixel_format;
297 ComponentType component_type;
298 SurfaceType type;
299 u32 width;
300 u32 height;
301 u32 depth;
302 u32 unaligned_height;
303 u32 pitch;
304 SurfaceTarget target;
305 SurfaceClass identity;
306 u32 max_mip_level;
307 bool is_layered;
308 bool is_array;
309 bool srgb_conversion;
310 // Parameters used for caching
311 u8* host_ptr;
312 GPUVAddr gpu_addr;
313 std::size_t size_in_bytes;
314 std::size_t size_in_bytes_gl;
315
316 // Render target specific parameters, not used in caching
317 struct {
318 u32 index;
319 u32 array_mode;
320 u32 volume;
321 u32 layer_stride;
322 u32 base_layer;
323 } rt;
324
325private:
326 std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
327 bool uncompressed = false) const;
328 std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
329 bool uncompressed = false) const;
330};
331
332}; // namespace OpenGL
333
334/// Hashable variation of SurfaceParams, used for a key in the surface cache
335struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
336 static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
337 SurfaceReserveKey res;
338 res.state = params;
339 res.state.identity = {}; // Ignore the origin of the texture
340 res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
341 res.state.rt = {}; // Ignore rt config in caching
342 return res;
343 }
344};
345namespace std {
346template <>
347struct hash<SurfaceReserveKey> {
348 std::size_t operator()(const SurfaceReserveKey& k) const {
349 return k.Hash();
350 }
351};
352} // namespace std
353
354namespace OpenGL {
355
356class RasterizerOpenGL;
357
358// This is used to store temporary big buffers,
359// instead of creating/destroying all the time
360struct RasterizerTemporaryMemory {
361 std::vector<std::vector<u8>> gl_buffer;
362};
363
364class CachedSurface final : public RasterizerCacheObject {
365public:
366 explicit CachedSurface(const SurfaceParams& params);
367
368 VAddr GetCpuAddr() const override {
369 return cpu_addr;
370 }
371
372 std::size_t GetSizeInBytes() const override {
373 return cached_size_in_bytes;
374 }
375
376 std::size_t GetMemorySize() const {
377 return memory_size;
378 }
379
380 const OGLTexture& Texture() const {
381 return texture;
382 }
383
384 const OGLTexture& Texture(bool as_array) {
385 if (params.is_array == as_array) {
386 return texture;
387 } else {
388 EnsureTextureDiscrepantView();
389 return discrepant_view;
390 }
391 }
392
393 GLenum Target() const {
394 return gl_target;
395 }
396
397 const SurfaceParams& GetSurfaceParams() const {
398 return params;
399 }
400
401 // Read/Write data in Switch memory to/from gl_buffer
402 void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
403 void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
404
405 // Upload data in gl_buffer to this surface's texture
406 void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
407 GLuint draw_fb_handle);
408
409 void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
410 Tegra::Texture::SwizzleSource swizzle_y,
411 Tegra::Texture::SwizzleSource swizzle_z,
412 Tegra::Texture::SwizzleSource swizzle_w);
413
414 void MarkReinterpreted() {
415 reinterpreted = true;
416 }
417
418 bool IsReinterpreted() const {
419 return reinterpreted;
420 }
421
422 void MarkForReload(bool reload) {
423 must_reload = reload;
424 }
425
426 bool MustReload() const {
427 return must_reload;
428 }
429
430 bool IsUploaded() const {
431 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
432 }
433
434private:
435 void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
436 GLuint read_fb_handle, GLuint draw_fb_handle);
437
438 void EnsureTextureDiscrepantView();
439
440 OGLTexture texture;
441 OGLTexture discrepant_view;
442 SurfaceParams params{};
443 GLenum gl_target{};
444 GLenum gl_internal_format{};
445 std::size_t cached_size_in_bytes{};
446 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
447 std::size_t memory_size;
448 bool reinterpreted = false;
449 bool must_reload = false;
450 VAddr cpu_addr{};
451};
452
453class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
454public:
455 explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
456
457 /// Get a surface based on the texture configuration
458 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
459 const GLShader::SamplerEntry& entry);
460
461 /// Get the depth surface based on the framebuffer configuration
462 Surface GetDepthBufferSurface(bool preserve_contents);
463
464 /// Get the color surface based on the framebuffer configuration and the specified render target
465 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
466
467 /// Tries to find a framebuffer using on the provided CPU address
468 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
469
470 /// Copies the contents of one surface to another
471 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
472 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
473 const Common::Rectangle<u32>& src_rect,
474 const Common::Rectangle<u32>& dst_rect);
475
476 void SignalPreDrawCall();
477 void SignalPostDrawCall();
478
479protected:
480 void FlushObjectInner(const Surface& object) override {
481 object->FlushGLBuffer(temporal_memory);
482 }
483
484private:
485 void LoadSurface(const Surface& surface);
486 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
487
488 /// Gets an uncached surface, creating it if need be
489 Surface GetUncachedSurface(const SurfaceParams& params);
490
491 /// Recreates a surface with new parameters
492 Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
493
494 /// Reserves a unique surface that can be reused later
495 void ReserveSurface(const Surface& surface);
496
497 /// Tries to get a reserved surface for the specified parameters
498 Surface TryGetReservedSurface(const SurfaceParams& params);
499
500 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
501 // returns true if the reinterpret was successful, false in case it was not.
502 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
503
504 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
505 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
506 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
507 void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
508 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
509 const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
510 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
511
512 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
513 /// previously been used. This is to prevent surfaces from being constantly created and
514 /// destroyed when used with different surface parameters.
515 std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;
516
517 OGLFramebuffer read_framebuffer;
518 OGLFramebuffer draw_framebuffer;
519
520 bool texception = false;
521
522 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
523 /// using the new format.
524 OGLBuffer copy_pbo;
525
526 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
527 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
528 Surface last_depth_buffer;
529
530 RasterizerTemporaryMemory temporal_memory;
531
532 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
533 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
534
535 static auto GetReinterpretInterval(const Surface& object) {
536 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
537 object->GetCacheAddr() + object->GetMemorySize() - 1);
538 }
539
540 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
541 SurfaceIntervalCache reinterpreted_surfaces;
542
543 void RegisterReinterpretSurface(Surface reinterpret_surface) {
544 auto interval = GetReinterpretInterval(reinterpret_surface);
545 reinterpreted_surfaces.insert({interval, reinterpret_surface});
546 reinterpret_surface->MarkReinterpreted();
547 }
548
549 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
550 const SurfaceInterval interval{addr};
551 for (auto& pair :
552 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
553 return pair.second;
554 }
555 return nullptr;
556 }
557
558 void Register(const Surface& object) override {
559 RasterizerCache<Surface>::Register(object);
560 }
561
562 /// Unregisters an object from the cache
563 void Unregister(const Surface& object) override {
564 if (object->IsReinterpreted()) {
565 auto interval = GetReinterpretInterval(object);
566 reinterpreted_surfaces.erase(interval);
567 }
568 RasterizerCache<Surface>::Unregister(object);
569 }
570};
571
572} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index bfe666a73..5c96c1d46 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -33,6 +33,24 @@ void OGLTexture::Release() {
33 handle = 0; 33 handle = 0;
34} 34}
35 35
36void OGLTextureView::Create() {
37 if (handle != 0)
38 return;
39
40 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
41 glGenTextures(1, &handle);
42}
43
44void OGLTextureView::Release() {
45 if (handle == 0)
46 return;
47
48 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
49 glDeleteTextures(1, &handle);
50 OpenGLState::GetCurState().UnbindTexture(handle).Apply();
51 handle = 0;
52}
53
36void OGLSampler::Create() { 54void OGLSampler::Create() {
37 if (handle != 0) 55 if (handle != 0)
38 return; 56 return;
@@ -130,6 +148,12 @@ void OGLBuffer::Release() {
130 handle = 0; 148 handle = 0;
131} 149}
132 150
151void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) {
152 ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; });
153
154 glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY);
155}
156
133void OGLSync::Create() { 157void OGLSync::Create() {
134 if (handle != 0) 158 if (handle != 0)
135 return; 159 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index fbb93ee49..3a85a1d4c 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -36,6 +36,31 @@ public:
36 GLuint handle = 0; 36 GLuint handle = 0;
37}; 37};
38 38
39class OGLTextureView : private NonCopyable {
40public:
41 OGLTextureView() = default;
42
43 OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
44
45 ~OGLTextureView() {
46 Release();
47 }
48
49 OGLTextureView& operator=(OGLTextureView&& o) noexcept {
50 Release();
51 handle = std::exchange(o.handle, 0);
52 return *this;
53 }
54
55 /// Creates a new internal OpenGL resource and stores the handle
56 void Create();
57
58 /// Deletes the internal OpenGL resource
59 void Release();
60
61 GLuint handle = 0;
62};
63
39class OGLSampler : private NonCopyable { 64class OGLSampler : private NonCopyable {
40public: 65public:
41 OGLSampler() = default; 66 OGLSampler() = default;
@@ -161,6 +186,9 @@ public:
161 /// Deletes the internal OpenGL resource 186 /// Deletes the internal OpenGL resource
162 void Release(); 187 void Release();
163 188
189 // Converts the buffer into a stream copy buffer with a fixed size
190 void MakeStreamCopy(std::size_t buffer_size);
191
164 GLuint handle = 0; 192 GLuint handle = 0;
165}; 193};
166 194
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
index defbc2d81..34ee37f00 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -17,9 +17,9 @@ public:
17 ~SamplerCacheOpenGL(); 17 ~SamplerCacheOpenGL();
18 18
19protected: 19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21 21
22 GLuint ToSamplerType(const OGLSampler& sampler) const; 22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23}; 23};
24 24
25} // namespace OpenGL 25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ac8a9e6b7..909ccb82c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
23 23
24using VideoCommon::Shader::ProgramCode; 24using VideoCommon::Shader::ProgramCode;
25 25
26// One UBO is always reserved for emulation values 26// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 RESERVED_UBOS = 1; 27constexpr u32 STAGE_RESERVED_UBOS = 1;
28 28
29struct UnspecializedShader { 29struct UnspecializedShader {
30 std::string code; 30 std::string code;
31 GLShader::ShaderEntries entries; 31 GLShader::ShaderEntries entries;
32 Maxwell::ShaderProgram program_type; 32 ProgramType program_type;
33}; 33};
34 34
35namespace { 35namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
55} 55}
56 56
57/// Gets the shader type from a Maxwell program type 57/// Gets the shader type from a Maxwell program type
58constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { 58constexpr GLenum GetShaderType(ProgramType program_type) {
59 switch (program_type) { 59 switch (program_type) {
60 case Maxwell::ShaderProgram::VertexA: 60 case ProgramType::VertexA:
61 case Maxwell::ShaderProgram::VertexB: 61 case ProgramType::VertexB:
62 return GL_VERTEX_SHADER; 62 return GL_VERTEX_SHADER;
63 case Maxwell::ShaderProgram::Geometry: 63 case ProgramType::Geometry:
64 return GL_GEOMETRY_SHADER; 64 return GL_GEOMETRY_SHADER;
65 case Maxwell::ShaderProgram::Fragment: 65 case ProgramType::Fragment:
66 return GL_FRAGMENT_SHADER; 66 return GL_FRAGMENT_SHADER;
67 case ProgramType::Compute:
68 return GL_COMPUTE_SHADER;
67 default: 69 default:
68 return GL_NONE; 70 return GL_NONE;
69 } 71 }
@@ -100,18 +102,44 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
100 } 102 }
101} 103}
102 104
105ProgramType GetProgramType(Maxwell::ShaderProgram program) {
106 switch (program) {
107 case Maxwell::ShaderProgram::VertexA:
108 return ProgramType::VertexA;
109 case Maxwell::ShaderProgram::VertexB:
110 return ProgramType::VertexB;
111 case Maxwell::ShaderProgram::TesselationControl:
112 return ProgramType::TessellationControl;
113 case Maxwell::ShaderProgram::TesselationEval:
114 return ProgramType::TessellationEval;
115 case Maxwell::ShaderProgram::Geometry:
116 return ProgramType::Geometry;
117 case Maxwell::ShaderProgram::Fragment:
118 return ProgramType::Fragment;
119 }
120 UNREACHABLE();
121 return {};
122}
123
103/// Calculates the size of a program stream 124/// Calculates the size of a program stream
104std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
105 constexpr std::size_t start_offset = 10; 126 constexpr std::size_t start_offset = 10;
127 // This is the encoded version of BRA that jumps to itself. All Nvidia
128 // shaders end with one.
129 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
130 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
106 std::size_t offset = start_offset; 131 std::size_t offset = start_offset;
107 std::size_t size = start_offset * sizeof(u64); 132 std::size_t size = start_offset * sizeof(u64);
108 while (offset < program.size()) { 133 while (offset < program.size()) {
109 const u64 instruction = program[offset]; 134 const u64 instruction = program[offset];
110 if (!IsSchedInstruction(offset, start_offset)) { 135 if (!IsSchedInstruction(offset, start_offset)) {
111 if (instruction == 0 || (instruction >> 52) == 0x50b) { 136 if ((instruction & mask) == self_jumping_branch) {
112 // End on Maxwell's "nop" instruction 137 // End on Maxwell's "nop" instruction
113 break; 138 break;
114 } 139 }
140 if (instruction == 0) {
141 break;
142 }
115 } 143 }
116 size += sizeof(u64); 144 size += sizeof(u64);
117 offset++; 145 offset++;
@@ -121,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
121} 149}
122 150
123/// Hashes one (or two) program streams 151/// Hashes one (or two) program streams
124u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, 152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
125 const ProgramCode& code_b) { 153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
126 u64 unique_identifier = 154 if (size_a == 0) {
127 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); 155 size_a = CalculateProgramSize(code);
128 if (program_type != Maxwell::ShaderProgram::VertexA) { 156 }
157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
158 if (program_type != ProgramType::VertexA) {
129 return unique_identifier; 159 return unique_identifier;
130 } 160 }
131 // VertexA programs include two programs 161 // VertexA programs include two programs
@@ -133,46 +163,69 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
133 std::size_t seed = 0; 163 std::size_t seed = 0;
134 boost::hash_combine(seed, unique_identifier); 164 boost::hash_combine(seed, unique_identifier);
135 165
136 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), 166 if (size_b == 0) {
137 CalculateProgramSize(code_b)); 167 size_b = CalculateProgramSize(code_b);
168 }
169 const u64 identifier_b =
170 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
138 boost::hash_combine(seed, identifier_b); 171 boost::hash_combine(seed, identifier_b);
139 return static_cast<u64>(seed); 172 return static_cast<u64>(seed);
140} 173}
141 174
142/// Creates an unspecialized program from code streams 175/// Creates an unspecialized program from code streams
143GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, 176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
144 ProgramCode program_code, ProgramCode program_code_b) { 177 ProgramCode program_code, ProgramCode program_code_b) {
145 GLShader::ShaderSetup setup(program_code); 178 GLShader::ShaderSetup setup(program_code);
146 if (program_type == Maxwell::ShaderProgram::VertexA) { 179 setup.program.size_a = CalculateProgramSize(program_code);
180 setup.program.size_b = 0;
181 if (program_type == ProgramType::VertexA) {
147 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
148 // Conventional HW does not support this, so we combine VertexA and VertexB into one 183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
149 // stage here. 184 // stage here.
150 setup.SetProgramB(program_code_b); 185 setup.SetProgramB(program_code_b);
186 setup.program.size_b = CalculateProgramSize(program_code_b);
151 } 187 }
152 setup.program.unique_identifier = 188 setup.program.unique_identifier = GetUniqueIdentifier(
153 GetUniqueIdentifier(program_type, program_code, program_code_b); 189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
154 190
155 switch (program_type) { 191 switch (program_type) {
156 case Maxwell::ShaderProgram::VertexA: 192 case ProgramType::VertexA:
157 case Maxwell::ShaderProgram::VertexB: 193 case ProgramType::VertexB:
158 return GLShader::GenerateVertexShader(device, setup); 194 return GLShader::GenerateVertexShader(device, setup);
159 case Maxwell::ShaderProgram::Geometry: 195 case ProgramType::Geometry:
160 return GLShader::GenerateGeometryShader(device, setup); 196 return GLShader::GenerateGeometryShader(device, setup);
161 case Maxwell::ShaderProgram::Fragment: 197 case ProgramType::Fragment:
162 return GLShader::GenerateFragmentShader(device, setup); 198 return GLShader::GenerateFragmentShader(device, setup);
199 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup);
163 default: 201 default:
164 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
165 UNREACHABLE();
166 return {}; 203 return {};
167 } 204 }
168} 205}
169 206
170CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
171 Maxwell::ShaderProgram program_type, BaseBindings base_bindings, 208 ProgramType program_type, const ProgramVariant& variant,
172 GLenum primitive_mode, bool hint_retrievable = false) { 209 bool hint_retrievable = false) {
210 auto base_bindings{variant.base_bindings};
211 const auto primitive_mode{variant.primitive_mode};
212 const auto texture_buffer_usage{variant.texture_buffer_usage};
213
173 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
174 "#extension GL_ARB_separate_shader_objects : enable\n\n"; 215 "#extension GL_ARB_separate_shader_objects : enable\n"
175 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n";
218 if (entries.shader_viewport_layer_array) {
219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
220 }
221 if (program_type == ProgramType::Compute) {
222 source += "#extension GL_ARB_compute_variable_group_size : require\n";
223 }
224 source += '\n';
225
226 if (program_type != ProgramType::Compute) {
227 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
228 }
176 229
177 for (const auto& cbuf : entries.const_buffers) { 230 for (const auto& cbuf : entries.const_buffers) {
178 source += 231 source +=
@@ -186,15 +239,34 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
186 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), 239 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
187 base_bindings.sampler++); 240 base_bindings.sampler++);
188 } 241 }
242 for (const auto& image : entries.images) {
243 source +=
244 fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
245 }
246
247 // Transform 1D textures to texture samplers by declaring its preprocessor macros.
248 for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
249 if (!texture_buffer_usage.test(i)) {
250 continue;
251 }
252 source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
253 }
254 if (texture_buffer_usage.any()) {
255 source += '\n';
256 }
189 257
190 if (program_type == Maxwell::ShaderProgram::Geometry) { 258 if (program_type == ProgramType::Geometry) {
191 const auto [glsl_topology, debug_name, max_vertices] = 259 const auto [glsl_topology, debug_name, max_vertices] =
192 GetPrimitiveDescription(primitive_mode); 260 GetPrimitiveDescription(primitive_mode);
193 261
194 source += "layout (" + std::string(glsl_topology) + ") in;\n"; 262 source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
195 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 263 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
196 } 264 }
265 if (program_type == ProgramType::Compute) {
266 source += "layout (local_size_variable) in;\n";
267 }
197 268
269 source += '\n';
198 source += code; 270 source += code;
199 271
200 OGLShader shader; 272 OGLShader shader;
@@ -221,131 +293,97 @@ std::set<GLenum> GetSupportedFormats() {
221 293
222} // Anonymous namespace 294} // Anonymous namespace
223 295
224CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, 296CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
225 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 297 GLShader::ProgramResult result)
226 const PrecompiledPrograms& precompiled_programs, 298 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
227 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) 299 unique_identifier{params.unique_identifier}, program_type{program_type},
228 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, 300 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
229 unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, 301 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
230 precompiled_programs{precompiled_programs} { 302
231 const std::size_t code_size{CalculateProgramSize(program_code)}; 303Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
232 const std::size_t code_size_b{program_code_b.empty() ? 0 304 Maxwell::ShaderProgram program_type,
233 : CalculateProgramSize(program_code_b)}; 305 ProgramCode&& program_code,
234 GLShader::ProgramResult program_result{ 306 ProgramCode&& program_code_b) {
235 CreateProgram(device, program_type, program_code, program_code_b)}; 307 const auto code_size{CalculateProgramSize(program_code)};
236 if (program_result.first.empty()) { 308 const auto code_size_b{CalculateProgramSize(program_code_b)};
309 auto result{
310 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
311 if (result.first.empty()) {
237 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now 312 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
238 return; 313 return {};
239 } 314 }
240 315
241 code = program_result.first; 316 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
242 entries = program_result.second; 317 params.unique_identifier, GetProgramType(program_type),
243 shader_length = entries.shader_length; 318 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
319 std::move(program_code), std::move(program_code_b)));
244 320
245 const ShaderDiskCacheRaw raw(unique_identifier, program_type, 321 return std::shared_ptr<CachedShader>(
246 static_cast<u32>(code_size / sizeof(u64)), 322 new CachedShader(params, GetProgramType(program_type), std::move(result)));
247 static_cast<u32>(code_size_b / sizeof(u64)),
248 std::move(program_code), std::move(program_code_b));
249 disk_cache.SaveRaw(raw);
250} 323}
251 324
252CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, 325Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
253 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 326 Maxwell::ShaderProgram program_type,
254 const PrecompiledPrograms& precompiled_programs, 327 GLShader::ProgramResult result) {
255 GLShader::ProgramResult result, u8* host_ptr) 328 return std::shared_ptr<CachedShader>(
256 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, 329 new CachedShader(params, GetProgramType(program_type), std::move(result)));
257 program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{
258 precompiled_programs} {
259 code = std::move(result.first);
260 entries = result.second;
261 shader_length = entries.shader_length;
262} 330}
263 331
264std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, 332Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
265 BaseBindings base_bindings) { 333 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
266 GLuint handle{};
267 if (program_type == Maxwell::ShaderProgram::Geometry) {
268 handle = GetGeometryShader(primitive_mode, base_bindings);
269 } else {
270 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
271 auto& program = entry->second;
272 if (is_cache_miss) {
273 program = TryLoadProgram(primitive_mode, base_bindings);
274 if (!program) {
275 program =
276 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
277 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
278 }
279
280 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
281 }
282 334
283 handle = program->handle; 335 const auto code_size{CalculateProgramSize(code)};
284 } 336 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
337 static_cast<u32>(code_size / sizeof(u64)), 0,
338 std::move(code), {}));
285 339
286 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; 340 return std::shared_ptr<CachedShader>(
287 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 341 new CachedShader(params, ProgramType::Compute, std::move(result)));
288 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 342}
289 343
290 return {handle, base_bindings}; 344Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
345 GLShader::ProgramResult result) {
346 return std::shared_ptr<CachedShader>(
347 new CachedShader(params, ProgramType::Compute, std::move(result)));
291} 348}
292 349
293GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { 350std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
294 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); 351 const auto [entry, is_cache_miss] = programs.try_emplace(variant);
295 auto& programs = entry->second; 352 auto& program = entry->second;
353 if (is_cache_miss) {
354 program = TryLoadProgram(variant);
355 if (!program) {
356 program = SpecializeShader(code, entries, program_type, variant);
357 disk_cache.SaveUsage(GetUsage(variant));
358 }
296 359
297 switch (primitive_mode) { 360 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
298 case GL_POINTS:
299 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
300 case GL_LINES:
301 case GL_LINE_STRIP:
302 return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
303 case GL_LINES_ADJACENCY:
304 case GL_LINE_STRIP_ADJACENCY:
305 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
306 case GL_TRIANGLES:
307 case GL_TRIANGLE_STRIP:
308 case GL_TRIANGLE_FAN:
309 return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
310 case GL_TRIANGLES_ADJACENCY:
311 case GL_TRIANGLE_STRIP_ADJACENCY:
312 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
313 default:
314 UNREACHABLE_MSG("Unknown primitive mode.");
315 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
316 } 361 }
317}
318 362
319GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, 363 auto base_bindings = variant.base_bindings;
320 GLenum primitive_mode) { 364 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
321 if (target_program) { 365 if (program_type != ProgramType::Compute) {
322 return target_program->handle; 366 base_bindings.cbuf += STAGE_RESERVED_UBOS;
323 }
324 const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
325 target_program = TryLoadProgram(primitive_mode, base_bindings);
326 if (!target_program) {
327 target_program =
328 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
329 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
330 } 367 }
368 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
369 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
331 370
332 LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); 371 return {program->handle, base_bindings};
333 372}
334 return target_program->handle;
335};
336 373
337CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode, 374CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
338 BaseBindings base_bindings) const { 375 const auto found = precompiled_programs.find(GetUsage(variant));
339 const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
340 if (found == precompiled_programs.end()) { 376 if (found == precompiled_programs.end()) {
341 return {}; 377 return {};
342 } 378 }
343 return found->second; 379 return found->second;
344} 380}
345 381
346ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, 382ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
347 BaseBindings base_bindings) const { 383 ShaderDiskCacheUsage usage;
348 return {unique_identifier, base_bindings, primitive_mode}; 384 usage.unique_identifier = unique_identifier;
385 usage.variant = variant;
386 return usage;
349} 387}
350 388
351ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 389ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -411,8 +449,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
411 } 449 }
412 if (!shader) { 450 if (!shader) {
413 shader = SpecializeShader(unspecialized.code, unspecialized.entries, 451 shader = SpecializeShader(unspecialized.code, unspecialized.entries,
414 unspecialized.program_type, usage.bindings, 452 unspecialized.program_type, usage.variant, true);
415 usage.primitive, true);
416 } 453 }
417 454
418 std::scoped_lock lock(mutex); 455 std::scoped_lock lock(mutex);
@@ -547,7 +584,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
547} 584}
548 585
549Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 586Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
550 if (!system.GPU().Maxwell3D().dirty_flags.shaders) { 587 if (!system.GPU().Maxwell3D().dirty.shaders) {
551 return last_shaders[static_cast<std::size_t>(program)]; 588 return last_shaders[static_cast<std::size_t>(program)];
552 } 589 }
553 590
@@ -564,28 +601,55 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
564 // No shader found - create a new one 601 // No shader found - create a new one
565 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 602 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
566 ProgramCode program_code_b; 603 ProgramCode program_code_b;
567 if (program == Maxwell::ShaderProgram::VertexA) { 604 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
605 if (is_program_a) {
568 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 606 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
569 program_code_b = GetShaderCode(memory_manager, program_addr_b, 607 program_code_b = GetShaderCode(memory_manager, program_addr_b,
570 memory_manager.GetPointer(program_addr_b)); 608 memory_manager.GetPointer(program_addr_b));
571 } 609 }
572 610
573 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 611 const auto unique_identifier =
574 const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 612 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
613 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
614 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
615 host_ptr, unique_identifier};
616
575 const auto found = precompiled_shaders.find(unique_identifier); 617 const auto found = precompiled_shaders.find(unique_identifier);
576 if (found != precompiled_shaders.end()) { 618 if (found == precompiled_shaders.end()) {
577 // Create a shader from the cache 619 shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
578 shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, 620 std::move(program_code_b));
579 precompiled_programs, found->second, host_ptr);
580 } else { 621 } else {
581 // Create a shader from guest memory 622 shader = CachedShader::CreateStageFromCache(params, program, found->second);
582 shader = std::make_shared<CachedShader>(
583 device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
584 std::move(program_code), std::move(program_code_b), host_ptr);
585 } 623 }
586 Register(shader); 624 Register(shader);
587 625
588 return last_shaders[static_cast<std::size_t>(program)] = shader; 626 return last_shaders[static_cast<std::size_t>(program)] = shader;
589} 627}
590 628
629Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
630 auto& memory_manager{system.GPU().MemoryManager()};
631 const auto host_ptr{memory_manager.GetPointer(code_addr)};
632 auto kernel = TryGet(host_ptr);
633 if (kernel) {
634 return kernel;
635 }
636
637 // No kernel found - create a new one
638 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
639 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
640 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
641 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
642 host_ptr, unique_identifier};
643
644 const auto found = precompiled_shaders.find(unique_identifier);
645 if (found == precompiled_shaders.end()) {
646 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
647 } else {
648 kernel = CachedShader::CreateKernelFromCache(params, found->second);
649 }
650
651 Register(kernel);
652 return kernel;
653}
654
591} // namespace OpenGL 655} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 09bd0761d..de195cc5d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <atomic> 8#include <atomic>
9#include <bitset>
9#include <memory> 10#include <memory>
10#include <set> 11#include <set>
11#include <tuple> 12#include <tuple>
@@ -41,17 +42,29 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
41using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 42using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
42using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; 43using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
43 44
45struct ShaderParameters {
46 ShaderDiskCacheOpenGL& disk_cache;
47 const PrecompiledPrograms& precompiled_programs;
48 const Device& device;
49 VAddr cpu_addr;
50 u8* host_ptr;
51 u64 unique_identifier;
52};
53
44class CachedShader final : public RasterizerCacheObject { 54class CachedShader final : public RasterizerCacheObject {
45public: 55public:
46 explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, 56 static Shader CreateStageFromMemory(const ShaderParameters& params,
47 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 57 Maxwell::ShaderProgram program_type,
48 const PrecompiledPrograms& precompiled_programs, 58 ProgramCode&& program_code, ProgramCode&& program_code_b);
49 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); 59
60 static Shader CreateStageFromCache(const ShaderParameters& params,
61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result);
63
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
50 65
51 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, 66 static Shader CreateKernelFromCache(const ShaderParameters& params,
52 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 67 GLShader::ProgramResult result);
53 const PrecompiledPrograms& precompiled_programs,
54 GLShader::ProgramResult result, u8* host_ptr);
55 68
56 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
57 return cpu_addr; 70 return cpu_addr;
@@ -67,49 +80,27 @@ public:
67 } 80 }
68 81
69 /// Gets the GL program handle for the shader 82 /// Gets the GL program handle for the shader
70 std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, 83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
71 BaseBindings base_bindings);
72 84
73private: 85private:
74 // Geometry programs. These are needed because GLSL needs an input topology but it's not 86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
75 // declared by the hardware. Workaround this issue by generating a different shader per input 87 GLShader::ProgramResult result);
76 // topology class.
77 struct GeometryPrograms {
78 CachedProgram points;
79 CachedProgram lines;
80 CachedProgram lines_adjacency;
81 CachedProgram triangles;
82 CachedProgram triangles_adjacency;
83 };
84 88
85 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); 89 CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
86 90
87 /// Generates a geometry shader or returns one that already exists. 91 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
88 GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
89 GLenum primitive_mode);
90 92
91 CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
92
93 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
94
95 u8* host_ptr{};
96 VAddr cpu_addr{}; 93 VAddr cpu_addr{};
97 u64 unique_identifier{}; 94 u64 unique_identifier{};
98 Maxwell::ShaderProgram program_type{}; 95 ProgramType program_type{};
99 ShaderDiskCacheOpenGL& disk_cache; 96 ShaderDiskCacheOpenGL& disk_cache;
100 const PrecompiledPrograms& precompiled_programs; 97 const PrecompiledPrograms& precompiled_programs;
101 98
102 std::size_t shader_length{};
103 GLShader::ShaderEntries entries; 99 GLShader::ShaderEntries entries;
104
105 std::string code; 100 std::string code;
101 std::size_t shader_length{};
106 102
107 std::unordered_map<BaseBindings, CachedProgram> programs; 103 std::unordered_map<ProgramVariant, CachedProgram> programs;
108 std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
109
110 std::unordered_map<u32, GLuint> cbuf_resource_cache;
111 std::unordered_map<u32, GLuint> gmem_resource_cache;
112 std::unordered_map<u32, GLint> uniform_cache;
113}; 104};
114 105
115class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 106class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -124,6 +115,9 @@ public:
124 /// Gets the current specified shader stage program 115 /// Gets the current specified shader stage program
125 Shader GetStageProgram(Maxwell::ShaderProgram program); 116 Shader GetStageProgram(Maxwell::ShaderProgram program);
126 117
118 /// Gets a compute kernel in the passed address
119 Shader GetComputeKernel(GPUVAddr code_addr);
120
127protected: 121protected:
128 // We do not have to flush this cache as things in it are never modified by us. 122 // We do not have to flush this cache as things in it are never modified by us.
129 void FlushObjectInner(const Shader& object) override {} 123 void FlushObjectInner(const Shader& object) override {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7dc2e0560..137b23740 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/assert.h" 15#include "common/assert.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/logging/log.h"
17#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
18#include "video_core/renderer_opengl/gl_device.h" 19#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -36,19 +37,18 @@ using namespace std::string_literals;
36using namespace VideoCommon::Shader; 37using namespace VideoCommon::Shader;
37 38
38using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
39using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
40using Operation = const OperationNode&; 40using Operation = const OperationNode&;
41 41
42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 42enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
43 43
44struct TextureAoffi {}; 44struct TextureAoffi {};
45using TextureArgument = std::pair<Type, Node>; 45using TextureArgument = std::pair<Type, Node>;
46using TextureIR = std::variant<TextureAoffi, TextureArgument>; 46using TextureIR = std::variant<TextureAoffi, TextureArgument>;
47 47
48constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 48constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 49 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
50 50
51class ShaderWriter { 51class ShaderWriter final {
52public: 52public:
53 void AddExpression(std::string_view text) { 53 void AddExpression(std::string_view text) {
54 DEBUG_ASSERT(scope >= 0); 54 DEBUG_ASSERT(scope >= 0);
@@ -93,9 +93,157 @@ private:
93 u32 temporary_index = 1; 93 u32 temporary_index = 1;
94}; 94};
95 95
96class Expression final {
97public:
98 Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
99 ASSERT(type != Type::Void);
100 }
101 Expression() : type{Type::Void} {}
102
103 Type GetType() const {
104 return type;
105 }
106
107 std::string GetCode() const {
108 return code;
109 }
110
111 void CheckVoid() const {
112 ASSERT(type == Type::Void);
113 }
114
115 std::string As(Type type) const {
116 switch (type) {
117 case Type::Bool:
118 return AsBool();
119 case Type::Bool2:
120 return AsBool2();
121 case Type::Float:
122 return AsFloat();
123 case Type::Int:
124 return AsInt();
125 case Type::Uint:
126 return AsUint();
127 case Type::HalfFloat:
128 return AsHalfFloat();
129 default:
130 UNREACHABLE_MSG("Invalid type");
131 return code;
132 }
133 }
134
135 std::string AsBool() const {
136 switch (type) {
137 case Type::Bool:
138 return code;
139 default:
140 UNREACHABLE_MSG("Incompatible types");
141 return code;
142 }
143 }
144
145 std::string AsBool2() const {
146 switch (type) {
147 case Type::Bool2:
148 return code;
149 default:
150 UNREACHABLE_MSG("Incompatible types");
151 return code;
152 }
153 }
154
155 std::string AsFloat() const {
156 switch (type) {
157 case Type::Float:
158 return code;
159 case Type::Uint:
160 return fmt::format("utof({})", code);
161 case Type::Int:
162 return fmt::format("itof({})", code);
163 case Type::HalfFloat:
164 return fmt::format("utof(packHalf2x16({}))", code);
165 default:
166 UNREACHABLE_MSG("Incompatible types");
167 return code;
168 }
169 }
170
171 std::string AsInt() const {
172 switch (type) {
173 case Type::Float:
174 return fmt::format("ftoi({})", code);
175 case Type::Uint:
176 return fmt::format("int({})", code);
177 case Type::Int:
178 return code;
179 case Type::HalfFloat:
180 return fmt::format("int(packHalf2x16({}))", code);
181 default:
182 UNREACHABLE_MSG("Incompatible types");
183 return code;
184 }
185 }
186
187 std::string AsUint() const {
188 switch (type) {
189 case Type::Float:
190 return fmt::format("ftou({})", code);
191 case Type::Uint:
192 return code;
193 case Type::Int:
194 return fmt::format("uint({})", code);
195 case Type::HalfFloat:
196 return fmt::format("packHalf2x16({})", code);
197 default:
198 UNREACHABLE_MSG("Incompatible types");
199 return code;
200 }
201 }
202
203 std::string AsHalfFloat() const {
204 switch (type) {
205 case Type::Float:
206 return fmt::format("unpackHalf2x16(ftou({}))", code);
207 case Type::Uint:
208 return fmt::format("unpackHalf2x16({})", code);
209 case Type::Int:
210 return fmt::format("unpackHalf2x16(int({}))", code);
211 case Type::HalfFloat:
212 return code;
213 default:
214 UNREACHABLE_MSG("Incompatible types");
215 return code;
216 }
217 }
218
219private:
220 std::string code;
221 Type type{};
222};
223
224constexpr const char* GetTypeString(Type type) {
225 switch (type) {
226 case Type::Bool:
227 return "bool";
228 case Type::Bool2:
229 return "bvec2";
230 case Type::Float:
231 return "float";
232 case Type::Int:
233 return "int";
234 case Type::Uint:
235 return "uint";
236 case Type::HalfFloat:
237 return "vec2";
238 default:
239 UNREACHABLE_MSG("Invalid type");
240 return "<invalid type>";
241 }
242}
243
96/// Generates code to use for a swizzle operation. 244/// Generates code to use for a swizzle operation.
97constexpr const char* GetSwizzle(u32 element) { 245constexpr const char* GetSwizzle(u32 element) {
98 constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"}; 246 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
99 return swizzle.at(element); 247 return swizzle.at(element);
100} 248}
101 249
@@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
134 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; 282 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
135} 283}
136 284
137constexpr Attribute::Index ToGenericAttribute(u32 value) { 285constexpr Attribute::Index ToGenericAttribute(u64 value) {
138 return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); 286 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
139} 287}
140 288
141u32 GetGenericAttributeIndex(Attribute::Index index) { 289u32 GetGenericAttributeIndex(Attribute::Index index) {
@@ -161,9 +309,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 309 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
162} 310}
163 311
312constexpr bool IsVertexShader(ProgramType stage) {
313 return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
314}
315
164class GLSLDecompiler final { 316class GLSLDecompiler final {
165public: 317public:
166 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, 318 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
167 std::string suffix) 319 std::string suffix)
168 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 320 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
169 321
@@ -180,20 +332,23 @@ public:
180 DeclareGlobalMemory(); 332 DeclareGlobalMemory();
181 DeclareSamplers(); 333 DeclareSamplers();
182 DeclarePhysicalAttributeReader(); 334 DeclarePhysicalAttributeReader();
335 DeclareImages();
183 336
184 code.AddLine("void execute_{}() {{", suffix); 337 code.AddLine("void execute_{}() {{", suffix);
185 ++code.scope; 338 ++code.scope;
186 339
187 // VM's program counter 340 // VM's program counter
188 const auto first_address = ir.GetBasicBlocks().begin()->first; 341 const auto first_address = ir.GetBasicBlocks().begin()->first;
189 code.AddLine("uint jmp_to = {}u;", first_address); 342 code.AddLine("uint jmp_to = {}U;", first_address);
190 343
191 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 344 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
192 // unlikely that shaders will use 20 nested SSYs and PBKs. 345 // unlikely that shaders will use 20 nested SSYs and PBKs.
193 constexpr u32 FLOW_STACK_SIZE = 20; 346 if (!ir.IsFlowStackDisabled()) {
194 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 347 constexpr u32 FLOW_STACK_SIZE = 20;
195 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 348 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
196 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); 349 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
350 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
351 }
197 } 352 }
198 353
199 code.AddLine("while (true) {{"); 354 code.AddLine("while (true) {{");
@@ -203,7 +358,7 @@ public:
203 358
204 for (const auto& pair : ir.GetBasicBlocks()) { 359 for (const auto& pair : ir.GetBasicBlocks()) {
205 const auto [address, bb] = pair; 360 const auto [address, bb] = pair;
206 code.AddLine("case 0x{:x}u: {{", address); 361 code.AddLine("case 0x{:X}U: {{", address);
207 ++code.scope; 362 ++code.scope;
208 363
209 VisitBlock(bb); 364 VisitBlock(bb);
@@ -234,30 +389,30 @@ public:
234 for (const auto& sampler : ir.GetSamplers()) { 389 for (const auto& sampler : ir.GetSamplers()) {
235 entries.samplers.emplace_back(sampler); 390 entries.samplers.emplace_back(sampler);
236 } 391 }
237 for (const auto& gmem_pair : ir.GetGlobalMemory()) { 392 for (const auto& [offset, image] : ir.GetImages()) {
238 const auto& [base, usage] = gmem_pair; 393 entries.images.emplace_back(image);
394 }
395 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
239 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, 396 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
240 usage.is_read, usage.is_written); 397 usage.is_read, usage.is_written);
241 } 398 }
242 entries.clip_distances = ir.GetClipDistances(); 399 entries.clip_distances = ir.GetClipDistances();
400 entries.shader_viewport_layer_array =
401 IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
243 entries.shader_length = ir.GetLength(); 402 entries.shader_length = ir.GetLength();
244 return entries; 403 return entries;
245 } 404 }
246 405
247private: 406private:
248 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
249 using OperationDecompilersArray =
250 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
251
252 void DeclareVertex() { 407 void DeclareVertex() {
253 if (stage != ShaderStage::Vertex) 408 if (!IsVertexShader(stage))
254 return; 409 return;
255 410
256 DeclareVertexRedeclarations(); 411 DeclareVertexRedeclarations();
257 } 412 }
258 413
259 void DeclareGeometry() { 414 void DeclareGeometry() {
260 if (stage != ShaderStage::Geometry) { 415 if (stage != ProgramType::Geometry) {
261 return; 416 return;
262 } 417 }
263 418
@@ -276,21 +431,34 @@ private:
276 } 431 }
277 432
278 void DeclareVertexRedeclarations() { 433 void DeclareVertexRedeclarations() {
279 bool clip_distances_declared = false;
280
281 code.AddLine("out gl_PerVertex {{"); 434 code.AddLine("out gl_PerVertex {{");
282 ++code.scope; 435 ++code.scope;
283 436
284 code.AddLine("vec4 gl_Position;"); 437 code.AddLine("vec4 gl_Position;");
285 438
286 for (const auto o : ir.GetOutputAttributes()) { 439 for (const auto attribute : ir.GetOutputAttributes()) {
287 if (o == Attribute::Index::PointSize) 440 if (attribute == Attribute::Index::ClipDistances0123 ||
288 code.AddLine("float gl_PointSize;"); 441 attribute == Attribute::Index::ClipDistances4567) {
289 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
290 o == Attribute::Index::ClipDistances4567)) {
291 code.AddLine("float gl_ClipDistance[];"); 442 code.AddLine("float gl_ClipDistance[];");
292 clip_distances_declared = true; 443 break;
444 }
445 }
446 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
447 if (ir.UsesLayer()) {
448 code.AddLine("int gl_Layer;");
449 }
450 if (ir.UsesViewportIndex()) {
451 code.AddLine("int gl_ViewportIndex;");
293 } 452 }
453 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
454 !device.HasVertexViewportLayer()) {
455 LOG_ERROR(
456 Render_OpenGL,
457 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
458 }
459
460 if (ir.UsesPointSize()) {
461 code.AddLine("float gl_PointSize;");
294 } 462 }
295 463
296 --code.scope; 464 --code.scope;
@@ -301,7 +469,7 @@ private:
301 void DeclareRegisters() { 469 void DeclareRegisters() {
302 const auto& registers = ir.GetRegisters(); 470 const auto& registers = ir.GetRegisters();
303 for (const u32 gpr : registers) { 471 for (const u32 gpr : registers) {
304 code.AddLine("float {} = 0;", GetRegister(gpr)); 472 code.AddLine("float {} = 0.0f;", GetRegister(gpr));
305 } 473 }
306 if (!registers.empty()) { 474 if (!registers.empty()) {
307 code.AddNewLine(); 475 code.AddNewLine();
@@ -319,11 +487,16 @@ private:
319 } 487 }
320 488
321 void DeclareLocalMemory() { 489 void DeclareLocalMemory() {
322 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { 490 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
323 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 491 // specialization time.
324 code.AddLine("float {}[{}];", GetLocalMemory(), element_count); 492 const u64 local_memory_size =
325 code.AddNewLine(); 493 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
494 if (local_memory_size == 0) {
495 return;
326 } 496 }
497 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
498 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
499 code.AddNewLine();
327 } 500 }
328 501
329 void DeclareInternalFlags() { 502 void DeclareInternalFlags() {
@@ -345,8 +518,6 @@ private:
345 return "noperspective "; 518 return "noperspective ";
346 default: 519 default:
347 case AttributeUse::Unused: 520 case AttributeUse::Unused:
348 UNREACHABLE_MSG("Unused attribute being fetched");
349 return {};
350 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); 521 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
351 return {}; 522 return {};
352 } 523 }
@@ -377,12 +548,12 @@ private:
377 const u32 location{GetGenericAttributeIndex(index)}; 548 const u32 location{GetGenericAttributeIndex(index)};
378 549
379 std::string name{GetInputAttribute(index)}; 550 std::string name{GetInputAttribute(index)};
380 if (stage == ShaderStage::Geometry) { 551 if (stage == ProgramType::Geometry) {
381 name = "gs_" + name + "[]"; 552 name = "gs_" + name + "[]";
382 } 553 }
383 554
384 std::string suffix; 555 std::string suffix;
385 if (stage == ShaderStage::Fragment) { 556 if (stage == ProgramType::Fragment) {
386 const auto input_mode{header.ps.GetAttributeUse(location)}; 557 const auto input_mode{header.ps.GetAttributeUse(location)};
387 if (skip_unused && input_mode == AttributeUse::Unused) { 558 if (skip_unused && input_mode == AttributeUse::Unused) {
388 return; 559 return;
@@ -394,7 +565,7 @@ private:
394 } 565 }
395 566
396 void DeclareOutputAttributes() { 567 void DeclareOutputAttributes() {
397 if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { 568 if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
398 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { 569 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
399 DeclareOutputAttribute(ToGenericAttribute(i)); 570 DeclareOutputAttribute(ToGenericAttribute(i));
400 } 571 }
@@ -423,7 +594,7 @@ private:
423 const auto [index, size] = entry; 594 const auto [index, size] = entry;
424 code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, 595 code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
425 GetConstBufferBlock(index)); 596 GetConstBufferBlock(index));
426 code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index)); 597 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
427 code.AddLine("}};"); 598 code.AddLine("}};");
428 code.AddNewLine(); 599 code.AddNewLine();
429 } 600 }
@@ -444,7 +615,7 @@ private:
444 615
445 code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", 616 code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
446 base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); 617 base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
447 code.AddLine(" float {}[];", GetGlobalMemory(base)); 618 code.AddLine(" uint {}[];", GetGlobalMemory(base));
448 code.AddLine("}};"); 619 code.AddLine("}};");
449 code.AddNewLine(); 620 code.AddNewLine();
450 } 621 }
@@ -453,9 +624,13 @@ private:
453 void DeclareSamplers() { 624 void DeclareSamplers() {
454 const auto& samplers = ir.GetSamplers(); 625 const auto& samplers = ir.GetSamplers();
455 for (const auto& sampler : samplers) { 626 for (const auto& sampler : samplers) {
456 std::string sampler_type = [&sampler] { 627 const std::string name{GetSampler(sampler)};
628 const std::string description{"layout (binding = SAMPLER_BINDING_" +
629 std::to_string(sampler.GetIndex()) + ") uniform"};
630 std::string sampler_type = [&]() {
457 switch (sampler.GetType()) { 631 switch (sampler.GetType()) {
458 case Tegra::Shader::TextureType::Texture1D: 632 case Tegra::Shader::TextureType::Texture1D:
633 // Special cased, read below.
459 return "sampler1D"; 634 return "sampler1D";
460 case Tegra::Shader::TextureType::Texture2D: 635 case Tegra::Shader::TextureType::Texture2D:
461 return "sampler2D"; 636 return "sampler2D";
@@ -475,8 +650,19 @@ private:
475 sampler_type += "Shadow"; 650 sampler_type += "Shadow";
476 } 651 }
477 652
478 code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), 653 if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) {
479 sampler_type, GetSampler(sampler)); 654 // 1D textures can be aliased to texture buffers, hide the declarations behind a
655 // preprocessor flag and use one or the other from the GPU state. This has to be
656 // done because shaders don't have enough information to determine the texture type.
657 EmitIfdefIsBuffer(sampler);
658 code.AddLine("{} samplerBuffer {};", description, name);
659 code.AddLine("#else");
660 code.AddLine("{} {} {};", description, sampler_type, name);
661 code.AddLine("#endif");
662 } else {
663 // The other texture types (2D, 3D and cubes) don't have this issue.
664 code.AddLine("{} {} {};", description, sampler_type, name);
665 }
480 } 666 }
481 if (!samplers.empty()) { 667 if (!samplers.empty()) {
482 code.AddNewLine(); 668 code.AddNewLine();
@@ -487,7 +673,7 @@ private:
487 if (!ir.HasPhysicalAttributes()) { 673 if (!ir.HasPhysicalAttributes()) {
488 return; 674 return;
489 } 675 }
490 code.AddLine("float readPhysicalAttribute(uint physical_address) {{"); 676 code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
491 ++code.scope; 677 ++code.scope;
492 code.AddLine("switch (physical_address) {{"); 678 code.AddLine("switch (physical_address) {{");
493 679
@@ -496,15 +682,16 @@ private:
496 for (u32 index = 0; index < num_attributes; ++index) { 682 for (u32 index = 0; index < num_attributes; ++index) {
497 const auto attribute{ToGenericAttribute(index)}; 683 const auto attribute{ToGenericAttribute(index)};
498 for (u32 element = 0; element < 4; ++element) { 684 for (u32 element = 0; element < 4; ++element) {
499 constexpr u32 generic_base{0x80}; 685 constexpr u32 generic_base = 0x80;
500 constexpr u32 generic_stride{16}; 686 constexpr u32 generic_stride = 16;
501 constexpr u32 element_stride{4}; 687 constexpr u32 element_stride = 4;
502 const u32 address{generic_base + index * generic_stride + element * element_stride}; 688 const u32 address{generic_base + index * generic_stride + element * element_stride};
503 689
504 const bool declared{stage != ShaderStage::Fragment || 690 const bool declared = stage != ProgramType::Fragment ||
505 header.ps.GetAttributeUse(index) != AttributeUse::Unused}; 691 header.ps.GetAttributeUse(index) != AttributeUse::Unused;
506 const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; 692 const std::string value =
507 code.AddLine("case 0x{:x}: return {};", address, value); 693 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
694 code.AddLine("case 0x{:X}U: return {};", address, value);
508 } 695 }
509 } 696 }
510 697
@@ -516,15 +703,68 @@ private:
516 code.AddNewLine(); 703 code.AddNewLine();
517 } 704 }
518 705
706 void DeclareImages() {
707 const auto& images{ir.GetImages()};
708 for (const auto& [offset, image] : images) {
709 const char* image_type = [&] {
710 switch (image.GetType()) {
711 case Tegra::Shader::ImageType::Texture1D:
712 return "image1D";
713 case Tegra::Shader::ImageType::TextureBuffer:
714 return "imageBuffer";
715 case Tegra::Shader::ImageType::Texture1DArray:
716 return "image1DArray";
717 case Tegra::Shader::ImageType::Texture2D:
718 return "image2D";
719 case Tegra::Shader::ImageType::Texture2DArray:
720 return "image2DArray";
721 case Tegra::Shader::ImageType::Texture3D:
722 return "image3D";
723 default:
724 UNREACHABLE();
725 return "image1D";
726 }
727 }();
728
729 const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> {
730 if (!image.IsSizeKnown()) {
731 return {"", ""};
732 }
733 switch (image.GetSize()) {
734 case Tegra::Shader::ImageAtomicSize::U32:
735 return {"u", "r32ui, "};
736 case Tegra::Shader::ImageAtomicSize::S32:
737 return {"i", "r32i, "};
738 default:
739 UNIMPLEMENTED_MSG("Unimplemented atomic size={}",
740 static_cast<u32>(image.GetSize()));
741 return {"", ""};
742 }
743 }();
744
745 std::string qualifier = "coherent volatile";
746 if (image.IsRead() && !image.IsWritten()) {
747 qualifier += " readonly";
748 } else if (image.IsWritten() && !image.IsRead()) {
749 qualifier += " writeonly";
750 }
751
752 code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform "
753 "{} {};",
754 image.GetIndex(), qualifier, image_type, GetImage(image));
755 }
756 if (!images.empty()) {
757 code.AddNewLine();
758 }
759 }
760
519 void VisitBlock(const NodeBlock& bb) { 761 void VisitBlock(const NodeBlock& bb) {
520 for (const auto& node : bb) { 762 for (const auto& node : bb) {
521 if (const std::string expr = Visit(node); !expr.empty()) { 763 Visit(node).CheckVoid();
522 code.AddLine(expr);
523 }
524 } 764 }
525 } 765 }
526 766
527 std::string Visit(const Node& node) { 767 Expression Visit(const Node& node) {
528 if (const auto operation = std::get_if<OperationNode>(&*node)) { 768 if (const auto operation = std::get_if<OperationNode>(&*node)) {
529 const auto operation_index = static_cast<std::size_t>(operation->GetCode()); 769 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
530 if (operation_index >= operation_decompilers.size()) { 770 if (operation_index >= operation_decompilers.size()) {
@@ -542,18 +782,18 @@ private:
542 if (const auto gpr = std::get_if<GprNode>(&*node)) { 782 if (const auto gpr = std::get_if<GprNode>(&*node)) {
543 const u32 index = gpr->GetIndex(); 783 const u32 index = gpr->GetIndex();
544 if (index == Register::ZeroIndex) { 784 if (index == Register::ZeroIndex) {
545 return "0"; 785 return {"0U", Type::Uint};
546 } 786 }
547 return GetRegister(index); 787 return {GetRegister(index), Type::Float};
548 } 788 }
549 789
550 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { 790 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
551 const u32 value = immediate->GetValue(); 791 const u32 value = immediate->GetValue();
552 if (value < 10) { 792 if (value < 10) {
553 // For eyecandy avoid using hex numbers on single digits 793 // For eyecandy avoid using hex numbers on single digits
554 return fmt::format("utof({}u)", immediate->GetValue()); 794 return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
555 } 795 }
556 return fmt::format("utof(0x{:x}u)", immediate->GetValue()); 796 return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
557 } 797 }
558 798
559 if (const auto predicate = std::get_if<PredicateNode>(&*node)) { 799 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
@@ -568,17 +808,18 @@ private:
568 } 808 }
569 }(); 809 }();
570 if (predicate->IsNegated()) { 810 if (predicate->IsNegated()) {
571 return fmt::format("!({})", value); 811 return {fmt::format("!({})", value), Type::Bool};
572 } 812 }
573 return value; 813 return {value, Type::Bool};
574 } 814 }
575 815
576 if (const auto abuf = std::get_if<AbufNode>(&*node)) { 816 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
577 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, 817 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
578 "Physical attributes in geometry shaders are not implemented"); 818 "Physical attributes in geometry shaders are not implemented");
579 if (abuf->IsPhysicalBuffer()) { 819 if (abuf->IsPhysicalBuffer()) {
580 return fmt::format("readPhysicalAttribute(ftou({}))", 820 return {fmt::format("ReadPhysicalAttribute({})",
581 Visit(abuf->GetPhysicalAddress())); 821 Visit(abuf->GetPhysicalAddress()).AsUint()),
822 Type::Float};
582 } 823 }
583 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); 824 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
584 } 825 }
@@ -589,56 +830,64 @@ private:
589 // Direct access 830 // Direct access
590 const u32 offset_imm = immediate->GetValue(); 831 const u32 offset_imm = immediate->GetValue();
591 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 832 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
592 return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), 833 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
593 offset_imm / (4 * 4), (offset_imm / 4) % 4); 834 offset_imm / (4 * 4), (offset_imm / 4) % 4),
835 Type::Uint};
594 } 836 }
595 837
596 if (std::holds_alternative<OperationNode>(*offset)) { 838 if (std::holds_alternative<OperationNode>(*offset)) {
597 // Indirect access 839 // Indirect access
598 const std::string final_offset = code.GenerateTemporary(); 840 const std::string final_offset = code.GenerateTemporary();
599 code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); 841 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
600 842
601 if (!device.HasComponentIndexingBug()) { 843 if (!device.HasComponentIndexingBug()) {
602 return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), 844 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
603 final_offset, final_offset); 845 final_offset, final_offset),
846 Type::Uint};
604 } 847 }
605 848
606 // AMD's proprietary GLSL compiler emits ill code for variable component access. 849 // AMD's proprietary GLSL compiler emits ill code for variable component access.
607 // To bypass this driver bug generate 4 ifs, one per each component. 850 // To bypass this driver bug generate 4 ifs, one per each component.
608 const std::string pack = code.GenerateTemporary(); 851 const std::string pack = code.GenerateTemporary();
609 code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), 852 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
610 final_offset); 853 final_offset);
611 854
612 const std::string result = code.GenerateTemporary(); 855 const std::string result = code.GenerateTemporary();
613 code.AddLine("float {};", result); 856 code.AddLine("uint {};", result);
614 for (u32 swizzle = 0; swizzle < 4; ++swizzle) { 857 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
615 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, 858 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
616 pack, GetSwizzle(swizzle)); 859 pack, GetSwizzle(swizzle));
617 } 860 }
618 return result; 861 return {result, Type::Uint};
619 } 862 }
620 863
621 UNREACHABLE_MSG("Unmanaged offset node type"); 864 UNREACHABLE_MSG("Unmanaged offset node type");
622 } 865 }
623 866
624 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 867 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
625 const std::string real = Visit(gmem->GetRealAddress()); 868 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
626 const std::string base = Visit(gmem->GetBaseAddress()); 869 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
627 const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); 870 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
628 return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); 871 return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
872 Type::Uint};
629 } 873 }
630 874
631 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 875 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
632 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 876 if (stage == ProgramType::Compute) {
877 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
878 }
879 return {
880 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
881 Type::Uint};
633 } 882 }
634 883
635 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { 884 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
636 return GetInternalFlag(internal_flag->GetFlag()); 885 return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
637 } 886 }
638 887
639 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 888 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
640 // It's invalid to call conditional on nested nodes, use an operation instead 889 // It's invalid to call conditional on nested nodes, use an operation instead
641 code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); 890 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
642 ++code.scope; 891 ++code.scope;
643 892
644 VisitBlock(conditional->GetCode()); 893 VisitBlock(conditional->GetCode());
@@ -649,20 +898,21 @@ private:
649 } 898 }
650 899
651 if (const auto comment = std::get_if<CommentNode>(&*node)) { 900 if (const auto comment = std::get_if<CommentNode>(&*node)) {
652 return "// " + comment->GetText(); 901 code.AddLine("// " + comment->GetText());
902 return {};
653 } 903 }
654 904
655 UNREACHABLE(); 905 UNREACHABLE();
656 return {}; 906 return {};
657 } 907 }
658 908
659 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { 909 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
660 const auto GeometryPass = [&](std::string_view name) { 910 const auto GeometryPass = [&](std::string_view name) {
661 if (stage == ShaderStage::Geometry && buffer) { 911 if (stage == ProgramType::Geometry && buffer) {
662 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 912 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
663 // set an 0x80000000 index for those and the shader fails to build. Find out why 913 // set an 0x80000000 index for those and the shader fails to build. Find out why
664 // this happens and what's its intent. 914 // this happens and what's its intent.
665 return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer)); 915 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
666 } 916 }
667 return std::string(name); 917 return std::string(name);
668 }; 918 };
@@ -670,72 +920,79 @@ private:
670 switch (attribute) { 920 switch (attribute) {
671 case Attribute::Index::Position: 921 case Attribute::Index::Position:
672 switch (stage) { 922 switch (stage) {
673 case ShaderStage::Geometry: 923 case ProgramType::Geometry:
674 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), 924 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
675 GetSwizzle(element)); 925 GetSwizzle(element)),
676 case ShaderStage::Fragment: 926 Type::Float};
677 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); 927 case ProgramType::Fragment:
928 return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
929 Type::Float};
678 default: 930 default:
679 UNREACHABLE(); 931 UNREACHABLE();
680 } 932 }
681 case Attribute::Index::PointCoord: 933 case Attribute::Index::PointCoord:
682 switch (element) { 934 switch (element) {
683 case 0: 935 case 0:
684 return "gl_PointCoord.x"; 936 return {"gl_PointCoord.x", Type::Float};
685 case 1: 937 case 1:
686 return "gl_PointCoord.y"; 938 return {"gl_PointCoord.y", Type::Float};
687 case 2: 939 case 2:
688 case 3: 940 case 3:
689 return "0"; 941 return {"0.0f", Type::Float};
690 } 942 }
691 UNREACHABLE(); 943 UNREACHABLE();
692 return "0"; 944 return {"0", Type::Int};
693 case Attribute::Index::TessCoordInstanceIDVertexID: 945 case Attribute::Index::TessCoordInstanceIDVertexID:
694 // TODO(Subv): Find out what the values are for the first two elements when inside a 946 // TODO(Subv): Find out what the values are for the first two elements when inside a
695 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 947 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
696 // shader. 948 // shader.
697 ASSERT(stage == ShaderStage::Vertex); 949 ASSERT(IsVertexShader(stage));
698 switch (element) { 950 switch (element) {
699 case 2: 951 case 2:
700 // Config pack's first value is instance_id. 952 // Config pack's first value is instance_id.
701 return "uintBitsToFloat(config_pack[0])"; 953 return {"config_pack[0]", Type::Uint};
702 case 3: 954 case 3:
703 return "uintBitsToFloat(gl_VertexID)"; 955 return {"gl_VertexID", Type::Int};
704 } 956 }
705 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); 957 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
706 return "0"; 958 return {"0", Type::Int};
707 case Attribute::Index::FrontFacing: 959 case Attribute::Index::FrontFacing:
708 // TODO(Subv): Find out what the values are for the other elements. 960 // TODO(Subv): Find out what the values are for the other elements.
709 ASSERT(stage == ShaderStage::Fragment); 961 ASSERT(stage == ProgramType::Fragment);
710 switch (element) { 962 switch (element) {
711 case 3: 963 case 3:
712 return "itof(gl_FrontFacing ? -1 : 0)"; 964 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
713 } 965 }
714 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); 966 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
715 return "0"; 967 return {"0", Type::Int};
716 default: 968 default:
717 if (IsGenericAttribute(attribute)) { 969 if (IsGenericAttribute(attribute)) {
718 return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); 970 return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
971 Type::Float};
719 } 972 }
720 break; 973 break;
721 } 974 }
722 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 975 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
723 return "0"; 976 return {"0", Type::Int};
724 } 977 }
725 978
726 std::string ApplyPrecise(Operation operation, const std::string& value) { 979 Expression ApplyPrecise(Operation operation, std::string value, Type type) {
727 if (!IsPrecise(operation)) { 980 if (!IsPrecise(operation)) {
728 return value; 981 return {std::move(value), type};
729 } 982 }
730 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders 983 // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
731 const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; 984 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
985 // also fail to build but nobody seems to care about those.
986 // Note: Only bugged drivers will skip precise.
987 const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment;
732 988
733 const std::string temporary = code.GenerateTemporary(); 989 std::string temporary = code.GenerateTemporary();
734 code.AddLine("{}float {} = {};", precise, temporary, value); 990 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
735 return temporary; 991 temporary, value);
992 return {std::move(temporary), type};
736 } 993 }
737 994
738 std::string VisitOperand(Operation operation, std::size_t operand_index) { 995 Expression VisitOperand(Operation operation, std::size_t operand_index) {
739 const auto& operand = operation[operand_index]; 996 const auto& operand = operation[operand_index];
740 const bool parent_precise = IsPrecise(operation); 997 const bool parent_precise = IsPrecise(operation);
741 const bool child_precise = IsPrecise(operand); 998 const bool child_precise = IsPrecise(operand);
@@ -744,102 +1001,98 @@ private:
744 return Visit(operand); 1001 return Visit(operand);
745 } 1002 }
746 1003
747 const std::string temporary = code.GenerateTemporary(); 1004 Expression value = Visit(operand);
748 code.AddLine("float {} = {};", temporary, Visit(operand)); 1005 std::string temporary = code.GenerateTemporary();
749 return temporary; 1006 code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
1007 return {std::move(temporary), value.GetType()};
750 } 1008 }
751 1009
752 std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { 1010 Expression GetOutputAttribute(const AbufNode* abuf) {
753 return CastOperand(VisitOperand(operation, operand_index), type); 1011 switch (const auto attribute = abuf->GetIndex()) {
754 } 1012 case Attribute::Index::Position:
755 1013 return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
756 std::string CastOperand(const std::string& value, Type type) const { 1014 case Attribute::Index::LayerViewportPointSize:
757 switch (type) { 1015 switch (abuf->GetElement()) {
758 case Type::Bool: 1016 case 0:
759 case Type::Bool2: 1017 UNIMPLEMENTED();
760 case Type::Float: 1018 return {};
761 return value; 1019 case 1:
762 case Type::Int: 1020 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
763 return fmt::format("ftoi({})", value); 1021 return {};
764 case Type::Uint: 1022 }
765 return fmt::format("ftou({})", value); 1023 return {"gl_Layer", Type::Int};
766 case Type::HalfFloat: 1024 case 2:
767 return fmt::format("toHalf2({})", value); 1025 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
768 } 1026 return {};
769 UNREACHABLE(); 1027 }
770 return value; 1028 return {"gl_ViewportIndex", Type::Int};
771 } 1029 case 3:
772 1030 UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
773 std::string BitwiseCastResult(const std::string& value, Type type, 1031 return {"gl_PointSize", Type::Float};
774 bool needs_parenthesis = false) {
775 switch (type) {
776 case Type::Bool:
777 case Type::Bool2:
778 case Type::Float:
779 if (needs_parenthesis) {
780 return fmt::format("({})", value);
781 } 1032 }
782 return value; 1033 return {};
783 case Type::Int: 1034 case Attribute::Index::ClipDistances0123:
784 return fmt::format("itof({})", value); 1035 return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
785 case Type::Uint: 1036 case Attribute::Index::ClipDistances4567:
786 return fmt::format("utof({})", value); 1037 return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
787 case Type::HalfFloat: 1038 default:
788 return fmt::format("fromHalf2({})", value); 1039 if (IsGenericAttribute(attribute)) {
1040 return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
1041 Type::Float};
1042 }
1043 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
1044 return {};
789 } 1045 }
790 UNREACHABLE();
791 return value;
792 } 1046 }
793 1047
794 std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, 1048 Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
795 Type type_a, bool needs_parenthesis = true) { 1049 Type type_a) {
796 const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a)); 1050 std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
797 1051 return ApplyPrecise(operation, std::move(op_str), result_type);
798 return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
799 } 1052 }
800 1053
801 std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, 1054 Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
802 Type type_a, Type type_b) { 1055 Type type_a, Type type_b) {
803 const std::string op_a = VisitOperand(operation, 0, type_a); 1056 const std::string op_a = VisitOperand(operation, 0).As(type_a);
804 const std::string op_b = VisitOperand(operation, 1, type_b); 1057 const std::string op_b = VisitOperand(operation, 1).As(type_b);
805 const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); 1058 std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
806 1059
807 return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); 1060 return ApplyPrecise(operation, std::move(op_str), result_type);
808 } 1061 }
809 1062
810 std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, 1063 Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
811 Type type_a, Type type_b) { 1064 Type type_a, Type type_b) {
812 const std::string op_a = VisitOperand(operation, 0, type_a); 1065 const std::string op_a = VisitOperand(operation, 0).As(type_a);
813 const std::string op_b = VisitOperand(operation, 1, type_b); 1066 const std::string op_b = VisitOperand(operation, 1).As(type_b);
814 const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); 1067 std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
815 1068
816 return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); 1069 return ApplyPrecise(operation, std::move(op_str), result_type);
817 } 1070 }
818 1071
819 std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, 1072 Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
820 Type type_a, Type type_b, Type type_c) { 1073 Type type_a, Type type_b, Type type_c) {
821 const std::string op_a = VisitOperand(operation, 0, type_a); 1074 const std::string op_a = VisitOperand(operation, 0).As(type_a);
822 const std::string op_b = VisitOperand(operation, 1, type_b); 1075 const std::string op_b = VisitOperand(operation, 1).As(type_b);
823 const std::string op_c = VisitOperand(operation, 2, type_c); 1076 const std::string op_c = VisitOperand(operation, 2).As(type_c);
824 const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); 1077 std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
825 1078
826 return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); 1079 return ApplyPrecise(operation, std::move(op_str), result_type);
827 } 1080 }
828 1081
829 std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, 1082 Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
830 Type type_a, Type type_b, Type type_c, Type type_d) { 1083 Type type_a, Type type_b, Type type_c, Type type_d) {
831 const std::string op_a = VisitOperand(operation, 0, type_a); 1084 const std::string op_a = VisitOperand(operation, 0).As(type_a);
832 const std::string op_b = VisitOperand(operation, 1, type_b); 1085 const std::string op_b = VisitOperand(operation, 1).As(type_b);
833 const std::string op_c = VisitOperand(operation, 2, type_c); 1086 const std::string op_c = VisitOperand(operation, 2).As(type_c);
834 const std::string op_d = VisitOperand(operation, 3, type_d); 1087 const std::string op_d = VisitOperand(operation, 3).As(type_d);
835 const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); 1088 std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
836 1089
837 return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); 1090 return ApplyPrecise(operation, std::move(op_str), result_type);
838 } 1091 }
839 1092
840 std::string GenerateTexture(Operation operation, const std::string& function_suffix, 1093 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
841 const std::vector<TextureIR>& extras) { 1094 const std::vector<TextureIR>& extras) {
842 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 1095 constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
843 1096
844 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1097 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
845 ASSERT(meta); 1098 ASSERT(meta);
@@ -856,17 +1109,17 @@ private:
856 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); 1109 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
857 expr += '('; 1110 expr += '(';
858 for (std::size_t i = 0; i < count; ++i) { 1111 for (std::size_t i = 0; i < count; ++i) {
859 expr += Visit(operation[i]); 1112 expr += Visit(operation[i]).AsFloat();
860 1113
861 const std::size_t next = i + 1; 1114 const std::size_t next = i + 1;
862 if (next < count) 1115 if (next < count)
863 expr += ", "; 1116 expr += ", ";
864 } 1117 }
865 if (has_array) { 1118 if (has_array) {
866 expr += ", float(ftoi(" + Visit(meta->array) + "))"; 1119 expr += ", float(" + Visit(meta->array).AsInt() + ')';
867 } 1120 }
868 if (has_shadow) { 1121 if (has_shadow) {
869 expr += ", " + Visit(meta->depth_compare); 1122 expr += ", " + Visit(meta->depth_compare).AsFloat();
870 } 1123 }
871 expr += ')'; 1124 expr += ')';
872 1125
@@ -897,11 +1150,11 @@ private:
897 // required to be constant) 1150 // required to be constant)
898 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 1151 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
899 } else { 1152 } else {
900 expr += fmt::format("ftoi({})", Visit(operand)); 1153 expr += Visit(operand).AsInt();
901 } 1154 }
902 break; 1155 break;
903 case Type::Float: 1156 case Type::Float:
904 expr += Visit(operand); 1157 expr += Visit(operand).AsFloat();
905 break; 1158 break;
906 default: { 1159 default: {
907 const auto type_int = static_cast<u32>(type); 1160 const auto type_int = static_cast<u32>(type);
@@ -917,7 +1170,7 @@ private:
917 if (aoffi.empty()) { 1170 if (aoffi.empty()) {
918 return {}; 1171 return {};
919 } 1172 }
920 constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; 1173 constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
921 std::string expr = ", "; 1174 std::string expr = ", ";
922 expr += coord_constructors.at(aoffi.size() - 1); 1175 expr += coord_constructors.at(aoffi.size() - 1);
923 expr += '('; 1176 expr += '(';
@@ -930,7 +1183,7 @@ private:
930 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 1183 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
931 } else if (device.HasVariableAoffi()) { 1184 } else if (device.HasVariableAoffi()) {
932 // Avoid using variable AOFFI on unsupported devices. 1185 // Avoid using variable AOFFI on unsupported devices.
933 expr += fmt::format("ftoi({})", Visit(operand)); 1186 expr += Visit(operand).AsInt();
934 } else { 1187 } else {
935 // Insert 0 on devices not supporting variable AOFFI. 1188 // Insert 0 on devices not supporting variable AOFFI.
936 expr += '0'; 1189 expr += '0';
@@ -944,318 +1197,382 @@ private:
944 return expr; 1197 return expr;
945 } 1198 }
946 1199
947 std::string Assign(Operation operation) { 1200 std::string BuildIntegerCoordinates(Operation operation) {
1201 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1202 const std::size_t coords_count{operation.GetOperandsCount()};
1203 std::string expr = constructors.at(coords_count - 1);
1204 for (std::size_t i = 0; i < coords_count; ++i) {
1205 expr += VisitOperand(operation, i).AsInt();
1206 if (i + 1 < coords_count) {
1207 expr += ", ";
1208 }
1209 }
1210 expr += ')';
1211 return expr;
1212 }
1213
1214 std::string BuildImageValues(Operation operation) {
1215 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1216 const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> {
1217 constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"};
1218 if (!meta.image.IsSizeKnown()) {
1219 return {float_constructors, Type::Float};
1220 }
1221 switch (meta.image.GetSize()) {
1222 case Tegra::Shader::ImageAtomicSize::U32:
1223 return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint};
1224 case Tegra::Shader::ImageAtomicSize::S32:
1225 return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint};
1226 default:
1227 UNIMPLEMENTED_MSG("Unimplemented image size={}",
1228 static_cast<u32>(meta.image.GetSize()));
1229 return {float_constructors, Type::Float};
1230 }
1231 }();
1232
1233 const std::size_t values_count{meta.values.size()};
1234 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1235 for (std::size_t i = 0; i < values_count; ++i) {
1236 expr += Visit(meta.values.at(i)).As(type);
1237 if (i + 1 < values_count) {
1238 expr += ", ";
1239 }
1240 }
1241 expr += ')';
1242 return expr;
1243 }
1244
1245 Expression AtomicImage(Operation operation, const char* opname) {
1246 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1247 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1248 ASSERT(meta.values.size() == 1);
1249 ASSERT(meta.image.IsSizeKnown());
1250
1251 const auto type = [&]() {
1252 switch (const auto size = meta.image.GetSize()) {
1253 case Tegra::Shader::ImageAtomicSize::U32:
1254 return Type::Uint;
1255 case Tegra::Shader::ImageAtomicSize::S32:
1256 return Type::Int;
1257 default:
1258 UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size));
1259 return Type::Uint;
1260 }
1261 }();
1262
1263 return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image),
1264 BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)),
1265 type};
1266 }
1267
1268 Expression Assign(Operation operation) {
948 const Node& dest = operation[0]; 1269 const Node& dest = operation[0];
949 const Node& src = operation[1]; 1270 const Node& src = operation[1];
950 1271
951 std::string target; 1272 Expression target;
952 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1273 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
953 if (gpr->GetIndex() == Register::ZeroIndex) { 1274 if (gpr->GetIndex() == Register::ZeroIndex) {
954 // Writing to Register::ZeroIndex is a no op 1275 // Writing to Register::ZeroIndex is a no op
955 return {}; 1276 return {};
956 } 1277 }
957 target = GetRegister(gpr->GetIndex()); 1278 target = {GetRegister(gpr->GetIndex()), Type::Float};
958 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { 1279 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
959 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); 1280 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
960 1281 target = GetOutputAttribute(abuf);
961 target = [&]() -> std::string {
962 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
963 case Attribute::Index::Position:
964 return "gl_Position"s + GetSwizzle(abuf->GetElement());
965 case Attribute::Index::PointSize:
966 return "gl_PointSize";
967 case Attribute::Index::ClipDistances0123:
968 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
969 case Attribute::Index::ClipDistances4567:
970 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
971 default:
972 if (IsGenericAttribute(attribute)) {
973 return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
974 }
975 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
976 static_cast<u32>(attribute));
977 return "0";
978 }
979 }();
980 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1282 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
981 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 1283 if (stage == ProgramType::Compute) {
1284 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1285 }
1286 target = {
1287 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1288 Type::Uint};
982 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1289 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
983 const std::string real = Visit(gmem->GetRealAddress()); 1290 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
984 const std::string base = Visit(gmem->GetBaseAddress()); 1291 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
985 const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); 1292 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
986 target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); 1293 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1294 Type::Uint};
987 } else { 1295 } else {
988 UNREACHABLE_MSG("Assign called without a proper target"); 1296 UNREACHABLE_MSG("Assign called without a proper target");
989 } 1297 }
990 1298
991 code.AddLine("{} = {};", target, Visit(src)); 1299 code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
992 return {}; 1300 return {};
993 } 1301 }
994 1302
995 template <Type type> 1303 template <Type type>
996 std::string Add(Operation operation) { 1304 Expression Add(Operation operation) {
997 return GenerateBinaryInfix(operation, "+", type, type, type); 1305 return GenerateBinaryInfix(operation, "+", type, type, type);
998 } 1306 }
999 1307
1000 template <Type type> 1308 template <Type type>
1001 std::string Mul(Operation operation) { 1309 Expression Mul(Operation operation) {
1002 return GenerateBinaryInfix(operation, "*", type, type, type); 1310 return GenerateBinaryInfix(operation, "*", type, type, type);
1003 } 1311 }
1004 1312
1005 template <Type type> 1313 template <Type type>
1006 std::string Div(Operation operation) { 1314 Expression Div(Operation operation) {
1007 return GenerateBinaryInfix(operation, "/", type, type, type); 1315 return GenerateBinaryInfix(operation, "/", type, type, type);
1008 } 1316 }
1009 1317
1010 template <Type type> 1318 template <Type type>
1011 std::string Fma(Operation operation) { 1319 Expression Fma(Operation operation) {
1012 return GenerateTernary(operation, "fma", type, type, type, type); 1320 return GenerateTernary(operation, "fma", type, type, type, type);
1013 } 1321 }
1014 1322
1015 template <Type type> 1323 template <Type type>
1016 std::string Negate(Operation operation) { 1324 Expression Negate(Operation operation) {
1017 return GenerateUnary(operation, "-", type, type, true); 1325 return GenerateUnary(operation, "-", type, type);
1018 } 1326 }
1019 1327
1020 template <Type type> 1328 template <Type type>
1021 std::string Absolute(Operation operation) { 1329 Expression Absolute(Operation operation) {
1022 return GenerateUnary(operation, "abs", type, type, false); 1330 return GenerateUnary(operation, "abs", type, type);
1023 } 1331 }
1024 1332
1025 std::string FClamp(Operation operation) { 1333 Expression FClamp(Operation operation) {
1026 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, 1334 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
1027 Type::Float); 1335 Type::Float);
1028 } 1336 }
1029 1337
1338 Expression FCastHalf0(Operation operation) {
1339 return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1340 }
1341
1342 Expression FCastHalf1(Operation operation) {
1343 return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1344 }
1345
1030 template <Type type> 1346 template <Type type>
1031 std::string Min(Operation operation) { 1347 Expression Min(Operation operation) {
1032 return GenerateBinaryCall(operation, "min", type, type, type); 1348 return GenerateBinaryCall(operation, "min", type, type, type);
1033 } 1349 }
1034 1350
1035 template <Type type> 1351 template <Type type>
1036 std::string Max(Operation operation) { 1352 Expression Max(Operation operation) {
1037 return GenerateBinaryCall(operation, "max", type, type, type); 1353 return GenerateBinaryCall(operation, "max", type, type, type);
1038 } 1354 }
1039 1355
1040 std::string Select(Operation operation) { 1356 Expression Select(Operation operation) {
1041 const std::string condition = Visit(operation[0]); 1357 const std::string condition = Visit(operation[0]).AsBool();
1042 const std::string true_case = Visit(operation[1]); 1358 const std::string true_case = Visit(operation[1]).AsUint();
1043 const std::string false_case = Visit(operation[2]); 1359 const std::string false_case = Visit(operation[2]).AsUint();
1044 const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); 1360 std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
1045 1361
1046 return ApplyPrecise(operation, op_str); 1362 return ApplyPrecise(operation, std::move(op_str), Type::Uint);
1047 } 1363 }
1048 1364
1049 std::string FCos(Operation operation) { 1365 Expression FCos(Operation operation) {
1050 return GenerateUnary(operation, "cos", Type::Float, Type::Float, false); 1366 return GenerateUnary(operation, "cos", Type::Float, Type::Float);
1051 } 1367 }
1052 1368
1053 std::string FSin(Operation operation) { 1369 Expression FSin(Operation operation) {
1054 return GenerateUnary(operation, "sin", Type::Float, Type::Float, false); 1370 return GenerateUnary(operation, "sin", Type::Float, Type::Float);
1055 } 1371 }
1056 1372
1057 std::string FExp2(Operation operation) { 1373 Expression FExp2(Operation operation) {
1058 return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false); 1374 return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
1059 } 1375 }
1060 1376
1061 std::string FLog2(Operation operation) { 1377 Expression FLog2(Operation operation) {
1062 return GenerateUnary(operation, "log2", Type::Float, Type::Float, false); 1378 return GenerateUnary(operation, "log2", Type::Float, Type::Float);
1063 } 1379 }
1064 1380
1065 std::string FInverseSqrt(Operation operation) { 1381 Expression FInverseSqrt(Operation operation) {
1066 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false); 1382 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
1067 } 1383 }
1068 1384
1069 std::string FSqrt(Operation operation) { 1385 Expression FSqrt(Operation operation) {
1070 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false); 1386 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
1071 } 1387 }
1072 1388
1073 std::string FRoundEven(Operation operation) { 1389 Expression FRoundEven(Operation operation) {
1074 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false); 1390 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
1075 } 1391 }
1076 1392
1077 std::string FFloor(Operation operation) { 1393 Expression FFloor(Operation operation) {
1078 return GenerateUnary(operation, "floor", Type::Float, Type::Float, false); 1394 return GenerateUnary(operation, "floor", Type::Float, Type::Float);
1079 } 1395 }
1080 1396
1081 std::string FCeil(Operation operation) { 1397 Expression FCeil(Operation operation) {
1082 return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false); 1398 return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
1083 } 1399 }
1084 1400
1085 std::string FTrunc(Operation operation) { 1401 Expression FTrunc(Operation operation) {
1086 return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false); 1402 return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
1087 } 1403 }
1088 1404
1089 template <Type type> 1405 template <Type type>
1090 std::string FCastInteger(Operation operation) { 1406 Expression FCastInteger(Operation operation) {
1091 return GenerateUnary(operation, "float", Type::Float, type, false); 1407 return GenerateUnary(operation, "float", Type::Float, type);
1092 } 1408 }
1093 1409
1094 std::string ICastFloat(Operation operation) { 1410 Expression ICastFloat(Operation operation) {
1095 return GenerateUnary(operation, "int", Type::Int, Type::Float, false); 1411 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1096 } 1412 }
1097 1413
1098 std::string ICastUnsigned(Operation operation) { 1414 Expression ICastUnsigned(Operation operation) {
1099 return GenerateUnary(operation, "int", Type::Int, Type::Uint, false); 1415 return GenerateUnary(operation, "int", Type::Int, Type::Uint);
1100 } 1416 }
1101 1417
1102 template <Type type> 1418 template <Type type>
1103 std::string LogicalShiftLeft(Operation operation) { 1419 Expression LogicalShiftLeft(Operation operation) {
1104 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); 1420 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
1105 } 1421 }
1106 1422
1107 std::string ILogicalShiftRight(Operation operation) { 1423 Expression ILogicalShiftRight(Operation operation) {
1108 const std::string op_a = VisitOperand(operation, 0, Type::Uint); 1424 const std::string op_a = VisitOperand(operation, 0).AsUint();
1109 const std::string op_b = VisitOperand(operation, 1, Type::Uint); 1425 const std::string op_b = VisitOperand(operation, 1).AsUint();
1110 const std::string op_str = fmt::format("int({} >> {})", op_a, op_b); 1426 std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
1111 1427
1112 return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int)); 1428 return ApplyPrecise(operation, std::move(op_str), Type::Int);
1113 } 1429 }
1114 1430
1115 std::string IArithmeticShiftRight(Operation operation) { 1431 Expression IArithmeticShiftRight(Operation operation) {
1116 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); 1432 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1117 } 1433 }
1118 1434
1119 template <Type type> 1435 template <Type type>
1120 std::string BitwiseAnd(Operation operation) { 1436 Expression BitwiseAnd(Operation operation) {
1121 return GenerateBinaryInfix(operation, "&", type, type, type); 1437 return GenerateBinaryInfix(operation, "&", type, type, type);
1122 } 1438 }
1123 1439
1124 template <Type type> 1440 template <Type type>
1125 std::string BitwiseOr(Operation operation) { 1441 Expression BitwiseOr(Operation operation) {
1126 return GenerateBinaryInfix(operation, "|", type, type, type); 1442 return GenerateBinaryInfix(operation, "|", type, type, type);
1127 } 1443 }
1128 1444
1129 template <Type type> 1445 template <Type type>
1130 std::string BitwiseXor(Operation operation) { 1446 Expression BitwiseXor(Operation operation) {
1131 return GenerateBinaryInfix(operation, "^", type, type, type); 1447 return GenerateBinaryInfix(operation, "^", type, type, type);
1132 } 1448 }
1133 1449
1134 template <Type type> 1450 template <Type type>
1135 std::string BitwiseNot(Operation operation) { 1451 Expression BitwiseNot(Operation operation) {
1136 return GenerateUnary(operation, "~", type, type, false); 1452 return GenerateUnary(operation, "~", type, type);
1137 } 1453 }
1138 1454
1139 std::string UCastFloat(Operation operation) { 1455 Expression UCastFloat(Operation operation) {
1140 return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false); 1456 return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
1141 } 1457 }
1142 1458
1143 std::string UCastSigned(Operation operation) { 1459 Expression UCastSigned(Operation operation) {
1144 return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false); 1460 return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
1145 } 1461 }
1146 1462
1147 std::string UShiftRight(Operation operation) { 1463 Expression UShiftRight(Operation operation) {
1148 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); 1464 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1149 } 1465 }
1150 1466
1151 template <Type type> 1467 template <Type type>
1152 std::string BitfieldInsert(Operation operation) { 1468 Expression BitfieldInsert(Operation operation) {
1153 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, 1469 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1154 Type::Int); 1470 Type::Int);
1155 } 1471 }
1156 1472
1157 template <Type type> 1473 template <Type type>
1158 std::string BitfieldExtract(Operation operation) { 1474 Expression BitfieldExtract(Operation operation) {
1159 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); 1475 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
1160 } 1476 }
1161 1477
1162 template <Type type> 1478 template <Type type>
1163 std::string BitCount(Operation operation) { 1479 Expression BitCount(Operation operation) {
1164 return GenerateUnary(operation, "bitCount", type, type, false); 1480 return GenerateUnary(operation, "bitCount", type, type);
1165 } 1481 }
1166 1482
1167 std::string HNegate(Operation operation) { 1483 Expression HNegate(Operation operation) {
1168 const auto GetNegate = [&](std::size_t index) { 1484 const auto GetNegate = [&](std::size_t index) {
1169 return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; 1485 return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
1170 }; 1486 };
1171 const std::string value = 1487 return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
1172 fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat), 1488 GetNegate(1), GetNegate(2)),
1173 GetNegate(1), GetNegate(2)); 1489 Type::HalfFloat};
1174 return BitwiseCastResult(value, Type::HalfFloat); 1490 }
1175 } 1491
1176 1492 Expression HClamp(Operation operation) {
1177 std::string HClamp(Operation operation) { 1493 const std::string value = VisitOperand(operation, 0).AsHalfFloat();
1178 const std::string value = VisitOperand(operation, 0, Type::HalfFloat); 1494 const std::string min = VisitOperand(operation, 1).AsFloat();
1179 const std::string min = VisitOperand(operation, 1, Type::Float); 1495 const std::string max = VisitOperand(operation, 2).AsFloat();
1180 const std::string max = VisitOperand(operation, 2, Type::Float); 1496 std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
1181 const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); 1497
1182 1498 return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
1183 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); 1499 }
1184 } 1500
1185 1501 Expression HCastFloat(Operation operation) {
1186 std::string HUnpack(Operation operation) { 1502 return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat};
1187 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
1188 const auto value = [&]() -> std::string {
1189 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1190 case Tegra::Shader::HalfType::H0_H1:
1191 return operand;
1192 case Tegra::Shader::HalfType::F32:
1193 return fmt::format("vec2(fromHalf2({}))", operand);
1194 case Tegra::Shader::HalfType::H0_H0:
1195 return fmt::format("vec2({}[0])", operand);
1196 case Tegra::Shader::HalfType::H1_H1:
1197 return fmt::format("vec2({}[1])", operand);
1198 }
1199 UNREACHABLE();
1200 return "0";
1201 }();
1202 return fmt::format("fromHalf2({})", value);
1203 } 1503 }
1204 1504
1205 std::string HMergeF32(Operation operation) { 1505 Expression HUnpack(Operation operation) {
1206 return fmt::format("float(toHalf2({})[0])", Visit(operation[0])); 1506 Expression operand = VisitOperand(operation, 0);
1507 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1508 case Tegra::Shader::HalfType::H0_H1:
1509 return operand;
1510 case Tegra::Shader::HalfType::F32:
1511 return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
1512 case Tegra::Shader::HalfType::H0_H0:
1513 return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
1514 case Tegra::Shader::HalfType::H1_H1:
1515 return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
1516 }
1517 }
1518
1519 Expression HMergeF32(Operation operation) {
1520 return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1207 } 1521 }
1208 1522
1209 std::string HMergeH0(Operation operation) { 1523 Expression HMergeH0(Operation operation) {
1210 return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]), 1524 std::string dest = VisitOperand(operation, 0).AsUint();
1211 Visit(operation[0])); 1525 std::string src = VisitOperand(operation, 1).AsUint();
1526 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint};
1212 } 1527 }
1213 1528
1214 std::string HMergeH1(Operation operation) { 1529 Expression HMergeH1(Operation operation) {
1215 return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]), 1530 std::string dest = VisitOperand(operation, 0).AsUint();
1216 Visit(operation[1])); 1531 std::string src = VisitOperand(operation, 1).AsUint();
1532 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint};
1217 } 1533 }
1218 1534
1219 std::string HPack2(Operation operation) { 1535 Expression HPack2(Operation operation) {
1220 return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]), 1536 return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
1221 Visit(operation[1])); 1537 VisitOperand(operation, 1).AsFloat()),
1538 Type::HalfFloat};
1222 } 1539 }
1223 1540
1224 template <Type type> 1541 template <Type type>
1225 std::string LogicalLessThan(Operation operation) { 1542 Expression LogicalLessThan(Operation operation) {
1226 return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); 1543 return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
1227 } 1544 }
1228 1545
1229 template <Type type> 1546 template <Type type>
1230 std::string LogicalEqual(Operation operation) { 1547 Expression LogicalEqual(Operation operation) {
1231 return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); 1548 return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
1232 } 1549 }
1233 1550
1234 template <Type type> 1551 template <Type type>
1235 std::string LogicalLessEqual(Operation operation) { 1552 Expression LogicalLessEqual(Operation operation) {
1236 return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); 1553 return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
1237 } 1554 }
1238 1555
1239 template <Type type> 1556 template <Type type>
1240 std::string LogicalGreaterThan(Operation operation) { 1557 Expression LogicalGreaterThan(Operation operation) {
1241 return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); 1558 return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
1242 } 1559 }
1243 1560
1244 template <Type type> 1561 template <Type type>
1245 std::string LogicalNotEqual(Operation operation) { 1562 Expression LogicalNotEqual(Operation operation) {
1246 return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); 1563 return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
1247 } 1564 }
1248 1565
1249 template <Type type> 1566 template <Type type>
1250 std::string LogicalGreaterEqual(Operation operation) { 1567 Expression LogicalGreaterEqual(Operation operation) {
1251 return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); 1568 return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
1252 } 1569 }
1253 1570
1254 std::string LogicalFIsNan(Operation operation) { 1571 Expression LogicalFIsNan(Operation operation) {
1255 return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false); 1572 return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
1256 } 1573 }
1257 1574
1258 std::string LogicalAssign(Operation operation) { 1575 Expression LogicalAssign(Operation operation) {
1259 const Node& dest = operation[0]; 1576 const Node& dest = operation[0];
1260 const Node& src = operation[1]; 1577 const Node& src = operation[1];
1261 1578
@@ -1276,82 +1593,80 @@ private:
1276 target = GetInternalFlag(flag->GetFlag()); 1593 target = GetInternalFlag(flag->GetFlag());
1277 } 1594 }
1278 1595
1279 code.AddLine("{} = {};", target, Visit(src)); 1596 code.AddLine("{} = {};", target, Visit(src).AsBool());
1280 return {}; 1597 return {};
1281 } 1598 }
1282 1599
1283 std::string LogicalAnd(Operation operation) { 1600 Expression LogicalAnd(Operation operation) {
1284 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); 1601 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1285 } 1602 }
1286 1603
1287 std::string LogicalOr(Operation operation) { 1604 Expression LogicalOr(Operation operation) {
1288 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); 1605 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1289 } 1606 }
1290 1607
1291 std::string LogicalXor(Operation operation) { 1608 Expression LogicalXor(Operation operation) {
1292 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); 1609 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
1293 } 1610 }
1294 1611
1295 std::string LogicalNegate(Operation operation) { 1612 Expression LogicalNegate(Operation operation) {
1296 return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false); 1613 return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
1297 } 1614 }
1298 1615
1299 std::string LogicalPick2(Operation operation) { 1616 Expression LogicalPick2(Operation operation) {
1300 const std::string pair = VisitOperand(operation, 0, Type::Bool2); 1617 return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
1301 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1618 VisitOperand(operation, 1).AsUint()),
1619 Type::Bool};
1302 } 1620 }
1303 1621
1304 std::string LogicalAll2(Operation operation) { 1622 Expression LogicalAnd2(Operation operation) {
1305 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1623 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1306 } 1624 }
1307 1625
1308 std::string LogicalAny2(Operation operation) {
1309 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1310 }
1311
1312 template <bool with_nan> 1626 template <bool with_nan>
1313 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1627 Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
1314 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1628 Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
1315 Type::HalfFloat, Type::HalfFloat)}; 1629 Type::HalfFloat, Type::HalfFloat);
1316 if constexpr (!with_nan) { 1630 if constexpr (!with_nan) {
1317 return comparison; 1631 return comparison;
1318 } 1632 }
1319 return fmt::format("halfFloatNanComparison({}, {}, {})", comparison, 1633 return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
1320 VisitOperand(operation, 0, Type::HalfFloat), 1634 VisitOperand(operation, 0).AsHalfFloat(),
1321 VisitOperand(operation, 1, Type::HalfFloat)); 1635 VisitOperand(operation, 1).AsHalfFloat()),
1636 Type::Bool2};
1322 } 1637 }
1323 1638
1324 template <bool with_nan> 1639 template <bool with_nan>
1325 std::string Logical2HLessThan(Operation operation) { 1640 Expression Logical2HLessThan(Operation operation) {
1326 return GenerateHalfComparison<with_nan>(operation, "lessThan"); 1641 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1327 } 1642 }
1328 1643
1329 template <bool with_nan> 1644 template <bool with_nan>
1330 std::string Logical2HEqual(Operation operation) { 1645 Expression Logical2HEqual(Operation operation) {
1331 return GenerateHalfComparison<with_nan>(operation, "equal"); 1646 return GenerateHalfComparison<with_nan>(operation, "equal");
1332 } 1647 }
1333 1648
1334 template <bool with_nan> 1649 template <bool with_nan>
1335 std::string Logical2HLessEqual(Operation operation) { 1650 Expression Logical2HLessEqual(Operation operation) {
1336 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); 1651 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
1337 } 1652 }
1338 1653
1339 template <bool with_nan> 1654 template <bool with_nan>
1340 std::string Logical2HGreaterThan(Operation operation) { 1655 Expression Logical2HGreaterThan(Operation operation) {
1341 return GenerateHalfComparison<with_nan>(operation, "greaterThan"); 1656 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
1342 } 1657 }
1343 1658
1344 template <bool with_nan> 1659 template <bool with_nan>
1345 std::string Logical2HNotEqual(Operation operation) { 1660 Expression Logical2HNotEqual(Operation operation) {
1346 return GenerateHalfComparison<with_nan>(operation, "notEqual"); 1661 return GenerateHalfComparison<with_nan>(operation, "notEqual");
1347 } 1662 }
1348 1663
1349 template <bool with_nan> 1664 template <bool with_nan>
1350 std::string Logical2HGreaterEqual(Operation operation) { 1665 Expression Logical2HGreaterEqual(Operation operation) {
1351 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); 1666 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
1352 } 1667 }
1353 1668
1354 std::string Texture(Operation operation) { 1669 Expression Texture(Operation operation) {
1355 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1670 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1356 ASSERT(meta); 1671 ASSERT(meta);
1357 1672
@@ -1360,10 +1675,10 @@ private:
1360 if (meta->sampler.IsShadow()) { 1675 if (meta->sampler.IsShadow()) {
1361 expr = "vec4(" + expr + ')'; 1676 expr = "vec4(" + expr + ')';
1362 } 1677 }
1363 return expr + GetSwizzle(meta->element); 1678 return {expr + GetSwizzle(meta->element), Type::Float};
1364 } 1679 }
1365 1680
1366 std::string TextureLod(Operation operation) { 1681 Expression TextureLod(Operation operation) {
1367 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1682 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1368 ASSERT(meta); 1683 ASSERT(meta);
1369 1684
@@ -1372,54 +1687,54 @@ private:
1372 if (meta->sampler.IsShadow()) { 1687 if (meta->sampler.IsShadow()) {
1373 expr = "vec4(" + expr + ')'; 1688 expr = "vec4(" + expr + ')';
1374 } 1689 }
1375 return expr + GetSwizzle(meta->element); 1690 return {expr + GetSwizzle(meta->element), Type::Float};
1376 } 1691 }
1377 1692
1378 std::string TextureGather(Operation operation) { 1693 Expression TextureGather(Operation operation) {
1379 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1694 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1380 ASSERT(meta); 1695 ASSERT(meta);
1381 1696
1382 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; 1697 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1383 return GenerateTexture(operation, "Gather", 1698 return {GenerateTexture(operation, "Gather",
1384 {TextureArgument{type, meta->component}, TextureAoffi{}}) + 1699 {TextureArgument{type, meta->component}, TextureAoffi{}}) +
1385 GetSwizzle(meta->element); 1700 GetSwizzle(meta->element),
1701 Type::Float};
1386 } 1702 }
1387 1703
1388 std::string TextureQueryDimensions(Operation operation) { 1704 Expression TextureQueryDimensions(Operation operation) {
1389 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1705 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1390 ASSERT(meta); 1706 ASSERT(meta);
1391 1707
1392 const std::string sampler = GetSampler(meta->sampler); 1708 const std::string sampler = GetSampler(meta->sampler);
1393 const std::string lod = VisitOperand(operation, 0, Type::Int); 1709 const std::string lod = VisitOperand(operation, 0).AsInt();
1394 1710
1395 switch (meta->element) { 1711 switch (meta->element) {
1396 case 0: 1712 case 0:
1397 case 1: 1713 case 1:
1398 return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod, 1714 return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
1399 GetSwizzle(meta->element)); 1715 Type::Int};
1400 case 2:
1401 return "0";
1402 case 3: 1716 case 3:
1403 return fmt::format("itof(textureQueryLevels({}))", sampler); 1717 return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
1404 } 1718 }
1405 UNREACHABLE(); 1719 UNREACHABLE();
1406 return "0"; 1720 return {"0", Type::Int};
1407 } 1721 }
1408 1722
1409 std::string TextureQueryLod(Operation operation) { 1723 Expression TextureQueryLod(Operation operation) {
1410 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1724 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1411 ASSERT(meta); 1725 ASSERT(meta);
1412 1726
1413 if (meta->element < 2) { 1727 if (meta->element < 2) {
1414 return fmt::format("itof(int(({} * vec2(256)){}))", 1728 return {fmt::format("int(({} * vec2(256)){})",
1415 GenerateTexture(operation, "QueryLod", {}), 1729 GenerateTexture(operation, "QueryLod", {}),
1416 GetSwizzle(meta->element)); 1730 GetSwizzle(meta->element)),
1731 Type::Int};
1417 } 1732 }
1418 return "0"; 1733 return {"0", Type::Int};
1419 } 1734 }
1420 1735
1421 std::string TexelFetch(Operation operation) { 1736 Expression TexelFetch(Operation operation) {
1422 constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; 1737 constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
1423 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1738 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1424 ASSERT(meta); 1739 ASSERT(meta);
1425 UNIMPLEMENTED_IF(meta->sampler.IsArray()); 1740 UNIMPLEMENTED_IF(meta->sampler.IsArray());
@@ -1432,60 +1747,117 @@ private:
1432 expr += constructors.at(operation.GetOperandsCount() - 1); 1747 expr += constructors.at(operation.GetOperandsCount() - 1);
1433 expr += '('; 1748 expr += '(';
1434 for (std::size_t i = 0; i < count; ++i) { 1749 for (std::size_t i = 0; i < count; ++i) {
1435 expr += VisitOperand(operation, i, Type::Int); 1750 expr += VisitOperand(operation, i).AsInt();
1436 const std::size_t next = i + 1; 1751 const std::size_t next = i + 1;
1437 if (next == count) 1752 if (next == count)
1438 expr += ')'; 1753 expr += ')';
1439 else if (next < count) 1754 else if (next < count)
1440 expr += ", "; 1755 expr += ", ";
1441 } 1756 }
1757
1758 // Store a copy of the expression without the lod to be used with texture buffers
1759 std::string expr_buffer = expr;
1760
1442 if (meta->lod) { 1761 if (meta->lod) {
1443 expr += ", "; 1762 expr += ", ";
1444 expr += CastOperand(Visit(meta->lod), Type::Int); 1763 expr += Visit(meta->lod).AsInt();
1445 } 1764 }
1446 expr += ')'; 1765 expr += ')';
1766 expr += GetSwizzle(meta->element);
1767
1768 expr_buffer += ')';
1769 expr_buffer += GetSwizzle(meta->element);
1447 1770
1448 return expr + GetSwizzle(meta->element); 1771 const std::string tmp{code.GenerateTemporary()};
1772 EmitIfdefIsBuffer(meta->sampler);
1773 code.AddLine("float {} = {};", tmp, expr_buffer);
1774 code.AddLine("#else");
1775 code.AddLine("float {} = {};", tmp, expr);
1776 code.AddLine("#endif");
1777
1778 return {tmp, Type::Float};
1779 }
1780
1781 Expression ImageStore(Operation operation) {
1782 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1783 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
1784 BuildIntegerCoordinates(operation), BuildImageValues(operation));
1785 return {};
1786 }
1787
1788 Expression AtomicImageAdd(Operation operation) {
1789 return AtomicImage(operation, "imageAtomicAdd");
1790 }
1791
1792 Expression AtomicImageMin(Operation operation) {
1793 return AtomicImage(operation, "imageAtomicMin");
1794 }
1795
1796 Expression AtomicImageMax(Operation operation) {
1797 return AtomicImage(operation, "imageAtomicMax");
1798 }
1799 Expression AtomicImageAnd(Operation operation) {
1800 return AtomicImage(operation, "imageAtomicAnd");
1449 } 1801 }
1450 1802
1451 std::string Branch(Operation operation) { 1803 Expression AtomicImageOr(Operation operation) {
1804 return AtomicImage(operation, "imageAtomicOr");
1805 }
1806
1807 Expression AtomicImageXor(Operation operation) {
1808 return AtomicImage(operation, "imageAtomicXor");
1809 }
1810
1811 Expression AtomicImageExchange(Operation operation) {
1812 return AtomicImage(operation, "imageAtomicExchange");
1813 }
1814
1815 Expression Branch(Operation operation) {
1452 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1816 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
1453 UNIMPLEMENTED_IF(!target); 1817 UNIMPLEMENTED_IF(!target);
1454 1818
1455 code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); 1819 code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
1456 code.AddLine("break;"); 1820 code.AddLine("break;");
1457 return {}; 1821 return {};
1458 } 1822 }
1459 1823
1460 std::string PushFlowStack(Operation operation) { 1824 Expression BranchIndirect(Operation operation) {
1825 const std::string op_a = VisitOperand(operation, 0).AsUint();
1826
1827 code.AddLine("jmp_to = {};", op_a);
1828 code.AddLine("break;");
1829 return {};
1830 }
1831
1832 Expression PushFlowStack(Operation operation) {
1461 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 1833 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1462 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1834 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
1463 UNIMPLEMENTED_IF(!target); 1835 UNIMPLEMENTED_IF(!target);
1464 1836
1465 code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), 1837 code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
1466 target->GetValue()); 1838 target->GetValue());
1467 return {}; 1839 return {};
1468 } 1840 }
1469 1841
1470 std::string PopFlowStack(Operation operation) { 1842 Expression PopFlowStack(Operation operation) {
1471 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 1843 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1472 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); 1844 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
1473 code.AddLine("break;"); 1845 code.AddLine("break;");
1474 return {}; 1846 return {};
1475 } 1847 }
1476 1848
1477 std::string Exit(Operation operation) { 1849 Expression Exit(Operation operation) {
1478 if (stage != ShaderStage::Fragment) { 1850 if (stage != ProgramType::Fragment) {
1479 code.AddLine("return;"); 1851 code.AddLine("return;");
1480 return {}; 1852 return {};
1481 } 1853 }
1482 const auto& used_registers = ir.GetRegisters(); 1854 const auto& used_registers = ir.GetRegisters();
1483 const auto SafeGetRegister = [&](u32 reg) -> std::string { 1855 const auto SafeGetRegister = [&](u32 reg) -> Expression {
1484 // TODO(Rodrigo): Replace with contains once C++20 releases 1856 // TODO(Rodrigo): Replace with contains once C++20 releases
1485 if (used_registers.find(reg) != used_registers.end()) { 1857 if (used_registers.find(reg) != used_registers.end()) {
1486 return GetRegister(reg); 1858 return {GetRegister(reg), Type::Float};
1487 } 1859 }
1488 return "0.0f"; 1860 return {"0.0f", Type::Float};
1489 }; 1861 };
1490 1862
1491 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); 1863 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
@@ -1498,7 +1870,7 @@ private:
1498 for (u32 component = 0; component < 4; ++component) { 1870 for (u32 component = 0; component < 4; ++component) {
1499 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { 1871 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1500 code.AddLine("FragColor{}[{}] = {};", render_target, component, 1872 code.AddLine("FragColor{}[{}] = {};", render_target, component,
1501 SafeGetRegister(current_reg)); 1873 SafeGetRegister(current_reg).AsFloat());
1502 ++current_reg; 1874 ++current_reg;
1503 } 1875 }
1504 } 1876 }
@@ -1507,14 +1879,14 @@ private:
1507 if (header.ps.omap.depth) { 1879 if (header.ps.omap.depth) {
1508 // The depth output is always 2 registers after the last color output, and current_reg 1880 // The depth output is always 2 registers after the last color output, and current_reg
1509 // already contains one past the last color register. 1881 // already contains one past the last color register.
1510 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1)); 1882 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
1511 } 1883 }
1512 1884
1513 code.AddLine("return;"); 1885 code.AddLine("return;");
1514 return {}; 1886 return {};
1515 } 1887 }
1516 1888
1517 std::string Discard(Operation operation) { 1889 Expression Discard(Operation operation) {
1518 // Enclose "discard" in a conditional, so that GLSL compilation does not complain 1890 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
1519 // about unexecuted instructions that may follow this. 1891 // about unexecuted instructions that may follow this.
1520 code.AddLine("if (true) {{"); 1892 code.AddLine("if (true) {{");
@@ -1525,8 +1897,8 @@ private:
1525 return {}; 1897 return {};
1526 } 1898 }
1527 1899
1528 std::string EmitVertex(Operation operation) { 1900 Expression EmitVertex(Operation operation) {
1529 ASSERT_MSG(stage == ShaderStage::Geometry, 1901 ASSERT_MSG(stage == ProgramType::Geometry,
1530 "EmitVertex is expected to be used in a geometry shader."); 1902 "EmitVertex is expected to be used in a geometry shader.");
1531 1903
1532 // If a geometry shader is attached, it will always flip (it's the last stage before 1904 // If a geometry shader is attached, it will always flip (it's the last stage before
@@ -1536,30 +1908,72 @@ private:
1536 return {}; 1908 return {};
1537 } 1909 }
1538 1910
1539 std::string EndPrimitive(Operation operation) { 1911 Expression EndPrimitive(Operation operation) {
1540 ASSERT_MSG(stage == ShaderStage::Geometry, 1912 ASSERT_MSG(stage == ProgramType::Geometry,
1541 "EndPrimitive is expected to be used in a geometry shader."); 1913 "EndPrimitive is expected to be used in a geometry shader.");
1542 1914
1543 code.AddLine("EndPrimitive();"); 1915 code.AddLine("EndPrimitive();");
1544 return {}; 1916 return {};
1545 } 1917 }
1546 1918
1547 std::string YNegate(Operation operation) { 1919 Expression YNegate(Operation operation) {
1548 // Config pack's third value is Y_NEGATE's state. 1920 // Config pack's third value is Y_NEGATE's state.
1549 return "uintBitsToFloat(config_pack[2])"; 1921 return {"config_pack[2]", Type::Uint};
1550 } 1922 }
1551 1923
1552 template <u32 element> 1924 template <u32 element>
1553 std::string LocalInvocationId(Operation) { 1925 Expression LocalInvocationId(Operation) {
1554 return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')'; 1926 return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
1555 } 1927 }
1556 1928
1557 template <u32 element> 1929 template <u32 element>
1558 std::string WorkGroupId(Operation) { 1930 Expression WorkGroupId(Operation) {
1559 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1931 return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
1932 }
1933
1934 Expression BallotThread(Operation operation) {
1935 const std::string value = VisitOperand(operation, 0).AsBool();
1936 if (!device.HasWarpIntrinsics()) {
1937 LOG_ERROR(Render_OpenGL,
1938 "Nvidia warp intrinsics are not available and its required by a shader");
1939 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1940 // one.
1941 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
1942 }
1943 return {fmt::format("ballotThreadNV({})", value), Type::Uint};
1944 }
1945
1946 Expression Vote(Operation operation, const char* func) {
1947 const std::string value = VisitOperand(operation, 0).AsBool();
1948 if (!device.HasWarpIntrinsics()) {
1949 LOG_ERROR(Render_OpenGL,
1950 "Nvidia vote intrinsics are not available and its required by a shader");
1951 // Stub with a warp size of one.
1952 return {value, Type::Bool};
1953 }
1954 return {fmt::format("{}({})", func, value), Type::Bool};
1955 }
1956
1957 Expression VoteAll(Operation operation) {
1958 return Vote(operation, "allThreadsNV");
1560 } 1959 }
1561 1960
1562 static constexpr OperationDecompilersArray operation_decompilers = { 1961 Expression VoteAny(Operation operation) {
1962 return Vote(operation, "anyThreadNV");
1963 }
1964
1965 Expression VoteEqual(Operation operation) {
1966 if (!device.HasWarpIntrinsics()) {
1967 LOG_ERROR(Render_OpenGL,
1968 "Nvidia vote intrinsics are not available and its required by a shader");
1969 // We must return true here since a stub for a theoretical warp size of 1 will always
1970 // return an equal result for all its votes.
1971 return {"true", Type::Bool};
1972 }
1973 return Vote(operation, "allThreadsEqualNV");
1974 }
1975
1976 static constexpr std::array operation_decompilers = {
1563 &GLSLDecompiler::Assign, 1977 &GLSLDecompiler::Assign,
1564 1978
1565 &GLSLDecompiler::Select, 1979 &GLSLDecompiler::Select,
@@ -1571,6 +1985,8 @@ private:
1571 &GLSLDecompiler::Negate<Type::Float>, 1985 &GLSLDecompiler::Negate<Type::Float>,
1572 &GLSLDecompiler::Absolute<Type::Float>, 1986 &GLSLDecompiler::Absolute<Type::Float>,
1573 &GLSLDecompiler::FClamp, 1987 &GLSLDecompiler::FClamp,
1988 &GLSLDecompiler::FCastHalf0,
1989 &GLSLDecompiler::FCastHalf1,
1574 &GLSLDecompiler::Min<Type::Float>, 1990 &GLSLDecompiler::Min<Type::Float>,
1575 &GLSLDecompiler::Max<Type::Float>, 1991 &GLSLDecompiler::Max<Type::Float>,
1576 &GLSLDecompiler::FCos, 1992 &GLSLDecompiler::FCos,
@@ -1631,6 +2047,7 @@ private:
1631 &GLSLDecompiler::Absolute<Type::HalfFloat>, 2047 &GLSLDecompiler::Absolute<Type::HalfFloat>,
1632 &GLSLDecompiler::HNegate, 2048 &GLSLDecompiler::HNegate,
1633 &GLSLDecompiler::HClamp, 2049 &GLSLDecompiler::HClamp,
2050 &GLSLDecompiler::HCastFloat,
1634 &GLSLDecompiler::HUnpack, 2051 &GLSLDecompiler::HUnpack,
1635 &GLSLDecompiler::HMergeF32, 2052 &GLSLDecompiler::HMergeF32,
1636 &GLSLDecompiler::HMergeH0, 2053 &GLSLDecompiler::HMergeH0,
@@ -1643,8 +2060,7 @@ private:
1643 &GLSLDecompiler::LogicalXor, 2060 &GLSLDecompiler::LogicalXor,
1644 &GLSLDecompiler::LogicalNegate, 2061 &GLSLDecompiler::LogicalNegate,
1645 &GLSLDecompiler::LogicalPick2, 2062 &GLSLDecompiler::LogicalPick2,
1646 &GLSLDecompiler::LogicalAll2, 2063 &GLSLDecompiler::LogicalAnd2,
1647 &GLSLDecompiler::LogicalAny2,
1648 2064
1649 &GLSLDecompiler::LogicalLessThan<Type::Float>, 2065 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1650 &GLSLDecompiler::LogicalEqual<Type::Float>, 2066 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1688,7 +2104,17 @@ private:
1688 &GLSLDecompiler::TextureQueryLod, 2104 &GLSLDecompiler::TextureQueryLod,
1689 &GLSLDecompiler::TexelFetch, 2105 &GLSLDecompiler::TexelFetch,
1690 2106
2107 &GLSLDecompiler::ImageStore,
2108 &GLSLDecompiler::AtomicImageAdd,
2109 &GLSLDecompiler::AtomicImageMin,
2110 &GLSLDecompiler::AtomicImageMax,
2111 &GLSLDecompiler::AtomicImageAnd,
2112 &GLSLDecompiler::AtomicImageOr,
2113 &GLSLDecompiler::AtomicImageXor,
2114 &GLSLDecompiler::AtomicImageExchange,
2115
1691 &GLSLDecompiler::Branch, 2116 &GLSLDecompiler::Branch,
2117 &GLSLDecompiler::BranchIndirect,
1692 &GLSLDecompiler::PushFlowStack, 2118 &GLSLDecompiler::PushFlowStack,
1693 &GLSLDecompiler::PopFlowStack, 2119 &GLSLDecompiler::PopFlowStack,
1694 &GLSLDecompiler::Exit, 2120 &GLSLDecompiler::Exit,
@@ -1704,7 +2130,13 @@ private:
1704 &GLSLDecompiler::WorkGroupId<0>, 2130 &GLSLDecompiler::WorkGroupId<0>,
1705 &GLSLDecompiler::WorkGroupId<1>, 2131 &GLSLDecompiler::WorkGroupId<1>,
1706 &GLSLDecompiler::WorkGroupId<2>, 2132 &GLSLDecompiler::WorkGroupId<2>,
2133
2134 &GLSLDecompiler::BallotThread,
2135 &GLSLDecompiler::VoteAll,
2136 &GLSLDecompiler::VoteAny,
2137 &GLSLDecompiler::VoteEqual,
1707 }; 2138 };
2139 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1708 2140
1709 std::string GetRegister(u32 index) const { 2141 std::string GetRegister(u32 index) const {
1710 return GetDeclarationWithSuffix(index, "gpr"); 2142 return GetDeclarationWithSuffix(index, "gpr");
@@ -1744,8 +2176,8 @@ private:
1744 } 2176 }
1745 2177
1746 std::string GetInternalFlag(InternalFlag flag) const { 2178 std::string GetInternalFlag(InternalFlag flag) const {
1747 constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag", 2179 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
1748 "carry_flag", "overflow_flag"}; 2180 "overflow_flag"};
1749 const auto index = static_cast<u32>(flag); 2181 const auto index = static_cast<u32>(flag);
1750 ASSERT(index < static_cast<u32>(InternalFlag::Amount)); 2182 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
1751 2183
@@ -1756,12 +2188,20 @@ private:
1756 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); 2188 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
1757 } 2189 }
1758 2190
2191 std::string GetImage(const Image& image) const {
2192 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
2193 }
2194
2195 void EmitIfdefIsBuffer(const Sampler& sampler) {
2196 code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
2197 }
2198
1759 std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { 2199 std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
1760 return fmt::format("{}_{}_{}", name, index, suffix); 2200 return fmt::format("{}_{}_{}", name, index, suffix);
1761 } 2201 }
1762 2202
1763 u32 GetNumPhysicalInputAttributes() const { 2203 u32 GetNumPhysicalInputAttributes() const {
1764 return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 2204 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
1765 } 2205 }
1766 2206
1767 u32 GetNumPhysicalAttributes() const { 2207 u32 GetNumPhysicalAttributes() const {
@@ -1774,7 +2214,7 @@ private:
1774 2214
1775 const Device& device; 2215 const Device& device;
1776 const ShaderIR& ir; 2216 const ShaderIR& ir;
1777 const ShaderStage stage; 2217 const ProgramType stage;
1778 const std::string suffix; 2218 const std::string suffix;
1779 const Header header; 2219 const Header header;
1780 2220
@@ -1785,27 +2225,19 @@ private:
1785 2225
1786std::string GetCommonDeclarations() { 2226std::string GetCommonDeclarations() {
1787 return fmt::format( 2227 return fmt::format(
1788 "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
1789 "#define ftoi floatBitsToInt\n" 2228 "#define ftoi floatBitsToInt\n"
1790 "#define ftou floatBitsToUint\n" 2229 "#define ftou floatBitsToUint\n"
1791 "#define itof intBitsToFloat\n" 2230 "#define itof intBitsToFloat\n"
1792 "#define utof uintBitsToFloat\n\n" 2231 "#define utof uintBitsToFloat\n\n"
1793 "float fromHalf2(vec2 pair) {{\n" 2232 "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
1794 " return utof(packHalf2x16(pair));\n"
1795 "}}\n\n"
1796 "vec2 toHalf2(float value) {{\n"
1797 " return unpackHalf2x16(ftou(value));\n"
1798 "}}\n\n"
1799 "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
1800 " bvec2 is_nan1 = isnan(pair1);\n" 2233 " bvec2 is_nan1 = isnan(pair1);\n"
1801 " bvec2 is_nan2 = isnan(pair2);\n" 2234 " bvec2 is_nan2 = isnan(pair2);\n"
1802 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " 2235 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
1803 "is_nan2.y);\n" 2236 "is_nan2.y);\n"
1804 "}}\n", 2237 "}}\n\n");
1805 MAX_CONSTBUFFER_ELEMENTS);
1806} 2238}
1807 2239
1808ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, 2240ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
1809 const std::string& suffix) { 2241 const std::string& suffix) {
1810 GLSLDecompiler decompiler(device, ir, stage, suffix); 2242 GLSLDecompiler decompiler(device, ir, stage, suffix);
1811 decompiler.Decompile(); 2243 decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index c1569e737..2ea02f5bf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,14 +12,26 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/shader/shader_ir.h" 13#include "video_core/shader/shader_ir.h"
14 14
15namespace OpenGL {
16class Device;
17}
18
19namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
20class ShaderIR; 16class ShaderIR;
21} 17}
22 18
19namespace OpenGL {
20
21class Device;
22
23enum class ProgramType : u32 {
24 VertexA = 0,
25 VertexB = 1,
26 TessellationControl = 2,
27 TessellationEval = 3,
28 Geometry = 4,
29 Fragment = 5,
30 Compute = 6
31};
32
33} // namespace OpenGL
34
23namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
24 36
25struct ShaderEntries; 37struct ShaderEntries;
@@ -27,6 +39,7 @@ struct ShaderEntries;
27using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
28using ProgramResult = std::pair<std::string, ShaderEntries>; 40using ProgramResult = std::pair<std::string, ShaderEntries>;
29using SamplerEntry = VideoCommon::Shader::Sampler; 41using SamplerEntry = VideoCommon::Shader::Sampler;
42using ImageEntry = VideoCommon::Shader::Image;
30 43
31class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 44class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
32public: 45public:
@@ -74,14 +87,16 @@ struct ShaderEntries {
74 std::vector<ConstBufferEntry> const_buffers; 87 std::vector<ConstBufferEntry> const_buffers;
75 std::vector<SamplerEntry> samplers; 88 std::vector<SamplerEntry> samplers;
76 std::vector<SamplerEntry> bindless_samplers; 89 std::vector<SamplerEntry> bindless_samplers;
90 std::vector<ImageEntry> images;
77 std::vector<GlobalMemoryEntry> global_memory_entries; 91 std::vector<GlobalMemoryEntry> global_memory_entries;
78 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 92 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
93 bool shader_viewport_layer_array{};
79 std::size_t shader_length{}; 94 std::size_t shader_length{};
80}; 95};
81 96
82std::string GetCommonDeclarations(); 97std::string GetCommonDeclarations();
83 98
84ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 99ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
85 Maxwell::ShaderStage stage, const std::string& suffix); 100 ProgramType stage, const std::string& suffix);
86 101
87} // namespace OpenGL::GLShader 102} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index ee4a45ca2..f141c4e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
34 Dump, 34 Dump,
35}; 35};
36 36
37constexpr u32 NativeVersion = 1; 37constexpr u32 NativeVersion = 4;
38 38
39// Making sure sizes doesn't change by accident 39// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 12); 40static_assert(sizeof(BaseBindings) == 16);
41static_assert(sizeof(ShaderDiskCacheUsage) == 24); 41static_assert(sizeof(ShaderDiskCacheUsage) == 40);
42 42
43namespace { 43namespace {
44 44
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
51 51
52} // namespace 52} // namespace
53 53
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b, 55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 56 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 57 : unique_identifier{unique_identifier}, program_type{program_type},
@@ -332,11 +332,37 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
332 static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); 332 static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
333 } 333 }
334 334
335 u32 images_count{};
336 if (!LoadObjectFromPrecompiled(images_count)) {
337 return {};
338 }
339 for (u32 i = 0; i < images_count; ++i) {
340 u64 offset{};
341 u64 index{};
342 u32 type{};
343 u8 is_bindless{};
344 u8 is_written{};
345 u8 is_read{};
346 u8 is_size_known{};
347 u32 size{};
348 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
349 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
350 !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
351 !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) {
352 return {};
353 }
354 entry.entries.images.emplace_back(
355 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
356 static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
357 is_read != 0,
358 is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size))
359 : std::nullopt);
360 }
361
335 u32 global_memory_count{}; 362 u32 global_memory_count{};
336 if (!LoadObjectFromPrecompiled(global_memory_count)) { 363 if (!LoadObjectFromPrecompiled(global_memory_count)) {
337 return {}; 364 return {};
338 } 365 }
339
340 for (u32 i = 0; i < global_memory_count; ++i) { 366 for (u32 i = 0; i < global_memory_count; ++i) {
341 u32 cbuf_index{}; 367 u32 cbuf_index{};
342 u32 cbuf_offset{}; 368 u32 cbuf_offset{};
@@ -356,11 +382,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
356 } 382 }
357 } 383 }
358 384
385 bool shader_viewport_layer_array{};
386 if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
387 return {};
388 }
389 entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
390
359 u64 shader_length{}; 391 u64 shader_length{};
360 if (!LoadObjectFromPrecompiled(shader_length)) { 392 if (!LoadObjectFromPrecompiled(shader_length)) {
361 return {}; 393 return {};
362 } 394 }
363
364 entry.entries.shader_length = static_cast<std::size_t>(shader_length); 395 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
365 396
366 return entry; 397 return entry;
@@ -400,6 +431,22 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
400 } 431 }
401 } 432 }
402 433
434 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
435 return false;
436 }
437 for (const auto& image : entries.images) {
438 const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U;
439 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
440 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
441 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
442 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
443 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
444 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
445 !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) {
446 return false;
447 }
448 }
449
403 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { 450 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
404 return false; 451 return false;
405 } 452 }
@@ -417,6 +464,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
417 } 464 }
418 } 465 }
419 466
467 if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
468 return false;
469 }
470
420 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 471 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
421 return false; 472 return false;
422 } 473 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ecd72ba58..cc8bbd61e 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <bitset>
7#include <optional> 8#include <optional>
8#include <string> 9#include <string>
9#include <tuple> 10#include <tuple>
@@ -17,7 +18,6 @@
17#include "common/assert.h" 18#include "common/assert.h"
18#include "common/common_types.h" 19#include "common/common_types.h"
19#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
22 22
23namespace Core { 23namespace Core {
@@ -30,22 +30,23 @@ class IOFile;
30 30
31namespace OpenGL { 31namespace OpenGL {
32 32
33using ProgramCode = std::vector<u64>;
34using Maxwell = Tegra::Engines::Maxwell3D::Regs;
35
36struct ShaderDiskCacheUsage; 33struct ShaderDiskCacheUsage;
37struct ShaderDiskCacheDump; 34struct ShaderDiskCacheDump;
38 35
36using ProgramCode = std::vector<u64>;
39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; 37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
38using TextureBufferUsage = std::bitset<64>;
40 39
41/// Allocated bindings used by an OpenGL shader program 40/// Allocated bindings used by an OpenGL shader program
42struct BaseBindings { 41struct BaseBindings {
43 u32 cbuf{}; 42 u32 cbuf{};
44 u32 gmem{}; 43 u32 gmem{};
45 u32 sampler{}; 44 u32 sampler{};
45 u32 image{};
46 46
47 bool operator==(const BaseBindings& rhs) const { 47 bool operator==(const BaseBindings& rhs) const {
48 return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); 48 return std::tie(cbuf, gmem, sampler, image) ==
49 std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
49 } 50 }
50 51
51 bool operator!=(const BaseBindings& rhs) const { 52 bool operator!=(const BaseBindings& rhs) const {
@@ -53,15 +54,29 @@ struct BaseBindings {
53 } 54 }
54}; 55};
55 56
56/// Describes how a shader is used 57/// Describes the different variants a single program can be compiled.
58struct ProgramVariant {
59 BaseBindings base_bindings;
60 GLenum primitive_mode{};
61 TextureBufferUsage texture_buffer_usage{};
62
63 bool operator==(const ProgramVariant& rhs) const {
64 return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
65 std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
66 }
67
68 bool operator!=(const ProgramVariant& rhs) const {
69 return !operator==(rhs);
70 }
71};
72
73/// Describes how a shader is used.
57struct ShaderDiskCacheUsage { 74struct ShaderDiskCacheUsage {
58 u64 unique_identifier{}; 75 u64 unique_identifier{};
59 BaseBindings bindings; 76 ProgramVariant variant;
60 GLenum primitive{};
61 77
62 bool operator==(const ShaderDiskCacheUsage& rhs) const { 78 bool operator==(const ShaderDiskCacheUsage& rhs) const {
63 return std::tie(unique_identifier, bindings, primitive) == 79 return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
64 std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
65 } 80 }
66 81
67 bool operator!=(const ShaderDiskCacheUsage& rhs) const { 82 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -76,7 +91,19 @@ namespace std {
76template <> 91template <>
77struct hash<OpenGL::BaseBindings> { 92struct hash<OpenGL::BaseBindings> {
78 std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { 93 std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
79 return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; 94 return static_cast<std::size_t>(bindings.cbuf) ^
95 (static_cast<std::size_t>(bindings.gmem) << 8) ^
96 (static_cast<std::size_t>(bindings.sampler) << 16) ^
97 (static_cast<std::size_t>(bindings.image) << 24);
98 }
99};
100
101template <>
102struct hash<OpenGL::ProgramVariant> {
103 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
104 return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
105 std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
106 (static_cast<std::size_t>(variant.primitive_mode) << 6);
80 } 107 }
81}; 108};
82 109
@@ -84,7 +111,7 @@ template <>
84struct hash<OpenGL::ShaderDiskCacheUsage> { 111struct hash<OpenGL::ShaderDiskCacheUsage> {
85 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { 112 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
86 return static_cast<std::size_t>(usage.unique_identifier) ^ 113 return static_cast<std::size_t>(usage.unique_identifier) ^
87 std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; 114 std::hash<OpenGL::ProgramVariant>()(usage.variant);
88 } 115 }
89}; 116};
90 117
@@ -95,7 +122,7 @@ namespace OpenGL {
95/// Describes a shader how it's used by the guest GPU 122/// Describes a shader how it's used by the guest GPU
96class ShaderDiskCacheRaw { 123class ShaderDiskCacheRaw {
97public: 124public:
98 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
99 u32 program_code_size, u32 program_code_size_b, 126 u32 program_code_size, u32 program_code_size_b,
100 ProgramCode program_code, ProgramCode program_code_b); 127 ProgramCode program_code, ProgramCode program_code_b);
101 ShaderDiskCacheRaw(); 128 ShaderDiskCacheRaw();
@@ -110,30 +137,13 @@ public:
110 } 137 }
111 138
112 bool HasProgramA() const { 139 bool HasProgramA() const {
113 return program_type == Maxwell::ShaderProgram::VertexA; 140 return program_type == ProgramType::VertexA;
114 } 141 }
115 142
116 Maxwell::ShaderProgram GetProgramType() const { 143 ProgramType GetProgramType() const {
117 return program_type; 144 return program_type;
118 } 145 }
119 146
120 Maxwell::ShaderStage GetProgramStage() const {
121 switch (program_type) {
122 case Maxwell::ShaderProgram::VertexA:
123 case Maxwell::ShaderProgram::VertexB:
124 return Maxwell::ShaderStage::Vertex;
125 case Maxwell::ShaderProgram::TesselationControl:
126 return Maxwell::ShaderStage::TesselationControl;
127 case Maxwell::ShaderProgram::TesselationEval:
128 return Maxwell::ShaderStage::TesselationEval;
129 case Maxwell::ShaderProgram::Geometry:
130 return Maxwell::ShaderStage::Geometry;
131 case Maxwell::ShaderProgram::Fragment:
132 return Maxwell::ShaderStage::Fragment;
133 }
134 UNREACHABLE();
135 }
136
137 const ProgramCode& GetProgramCode() const { 147 const ProgramCode& GetProgramCode() const {
138 return program_code; 148 return program_code;
139 } 149 }
@@ -144,7 +154,7 @@ public:
144 154
145private: 155private:
146 u64 unique_identifier{}; 156 u64 unique_identifier{};
147 Maxwell::ShaderProgram program_type{}; 157 ProgramType program_type{};
148 u32 program_code_size{}; 158 u32 program_code_size{};
149 u32 program_code_size_b{}; 159 u32 program_code_size_b{};
150 160
@@ -275,26 +285,17 @@ private:
275 return LoadArrayFromPrecompiled(&object, 1); 285 return LoadArrayFromPrecompiled(&object, 1);
276 } 286 }
277 287
278 bool LoadObjectFromPrecompiled(bool& object) {
279 u8 value;
280 const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
281 if (!read_ok) {
282 return false;
283 }
284
285 object = value != 0;
286 return true;
287 }
288
289 // Core system
290 Core::System& system; 288 Core::System& system;
291 // Stored transferable shaders 289
292 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; 290 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
293 // Stores whole precompiled cache which will be read from/saved to the precompiled cache file 291 // file
294 FileSys::VectorVfsFile precompiled_cache_virtual_file; 292 FileSys::VectorVfsFile precompiled_cache_virtual_file;
295 // Stores the current offset of the precompiled cache file for IO purposes 293 // Stores the current offset of the precompiled cache file for IO purposes
296 std::size_t precompiled_cache_virtual_file_offset = 0; 294 std::size_t precompiled_cache_virtual_file_offset = 0;
297 295
296 // Stored transferable shaders
297 std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
298
298 // The cache has been loaded at boot 299 // The cache has been loaded at boot
299 bool tried_to_load{}; 300 bool tried_to_load{};
300}; 301};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9148629ec..3a8d9e1da 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
14using VideoCommon::Shader::ProgramCode; 14using VideoCommon::Shader::ProgramCode;
15using VideoCommon::Shader::ShaderIR; 15using VideoCommon::Shader::ShaderIR;
16 16
17static constexpr u32 PROGRAM_OFFSET{10}; 17static constexpr u32 PROGRAM_OFFSET = 10;
18static constexpr u32 COMPUTE_OFFSET = 0;
18 19
19ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29}; 30};
30 31
31)"; 32)";
32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
33 ProgramResult program =
34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
35 33
34 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
35 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
36 ProgramResult program = Decompile(device, program_ir, stage, "vertex");
36 out += program.first; 37 out += program.first;
37 38
38 if (setup.IsDualProgram()) { 39 if (setup.IsDualProgram()) {
39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); 40 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
40 ProgramResult program_b = 41 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
42
43 out += program_b.first; 42 out += program_b.first;
44 } 43 }
45 44
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
80}; 79};
81 80
82)"; 81)";
83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 82
84 ProgramResult program = 83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); 84 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
86 out += program.first; 85 out += program.first;
87 86
88 out += R"( 87 out += R"(
@@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
115}; 114};
116 115
117)"; 116)";
118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 117 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
119 ProgramResult program = 118 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
121
122 out += program.first; 119 out += program.first;
123 120
124 out += R"( 121 out += R"(
@@ -130,4 +127,22 @@ void main() {
130 return {std::move(out), std::move(program.second)}; 127 return {std::move(out), std::move(program.second)};
131} 128}
132 129
130ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
131 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
132
133 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
134 out += GetCommonDeclarations();
135
136 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
137 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
138 out += program.first;
139
140 out += R"(
141void main() {
142 execute_compute();
143}
144)";
145 return {std::move(out), std::move(program.second)};
146}
147
133} // namespace OpenGL::GLShader 148} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0536c8a03..3833e88ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -27,6 +27,8 @@ struct ShaderSetup {
27 ProgramCode code; 27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders 28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier; 29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
30 } program; 32 } program;
31 33
32 /// Used in scenarios where we have a dual vertex shaders 34 /// Used in scenarios where we have a dual vertex shaders
@@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
52/// Generates the GLSL fragment shader program source code for the given FS program 54/// Generates the GLSL fragment shader program source code for the given FS program
53ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
54 56
57/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
59
55} // namespace OpenGL::GLShader 60} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5f3fe067e..9e74eda0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,21 +10,25 @@
10 10
11namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
12 12
13GLuint LoadShader(const char* source, GLenum type) { 13namespace {
14 const char* debug_type; 14const char* GetStageDebugName(GLenum type) {
15 switch (type) { 15 switch (type) {
16 case GL_VERTEX_SHADER: 16 case GL_VERTEX_SHADER:
17 debug_type = "vertex"; 17 return "vertex";
18 break;
19 case GL_GEOMETRY_SHADER: 18 case GL_GEOMETRY_SHADER:
20 debug_type = "geometry"; 19 return "geometry";
21 break;
22 case GL_FRAGMENT_SHADER: 20 case GL_FRAGMENT_SHADER:
23 debug_type = "fragment"; 21 return "fragment";
24 break; 22 case GL_COMPUTE_SHADER:
25 default: 23 return "compute";
26 UNREACHABLE();
27 } 24 }
25 UNIMPLEMENTED();
26 return "unknown";
27}
28} // Anonymous namespace
29
30GLuint LoadShader(const char* source, GLenum type) {
31 const char* debug_type = GetStageDebugName(type);
28 const GLuint shader_id = glCreateShader(type); 32 const GLuint shader_id = glCreateShader(type);
29 glShaderSource(shader_id, 1, &source, nullptr); 33 glShaderSource(shader_id, 1, &source, nullptr);
30 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 34 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac..6eabf4fac 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
6#include <glad/glad.h> 6#include <glad/glad.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
10 11
12MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
13
11namespace OpenGL { 14namespace OpenGL {
12 15
13using Maxwell = Tegra::Engines::Maxwell3D::Regs; 16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -31,6 +34,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) {
31 return changed; 34 return changed;
32} 35}
33 36
37template <typename T>
38std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
39 std::optional<std::size_t> first;
40 std::size_t last;
41 for (std::size_t i = 0; i < std::size(current_values); ++i) {
42 if (!UpdateValue(current_values[i], new_values[i])) {
43 continue;
44 }
45 if (!first) {
46 first = i;
47 }
48 last = i;
49 }
50 if (!first) {
51 return std::nullopt;
52 }
53 return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
54}
55
34void Enable(GLenum cap, bool enable) { 56void Enable(GLenum cap, bool enable) {
35 if (enable) { 57 if (enable) {
36 glEnable(cap); 58 glEnable(cap);
@@ -131,10 +153,6 @@ OpenGLState::OpenGLState() {
131 logic_op.enabled = false; 153 logic_op.enabled = false;
132 logic_op.operation = GL_COPY; 154 logic_op.operation = GL_COPY;
133 155
134 for (auto& texture_unit : texture_units) {
135 texture_unit.Reset();
136 }
137
138 draw.read_framebuffer = 0; 156 draw.read_framebuffer = 0;
139 draw.draw_framebuffer = 0; 157 draw.draw_framebuffer = 0;
140 draw.vertex_array = 0; 158 draw.vertex_array = 0;
@@ -162,6 +180,25 @@ OpenGLState::OpenGLState() {
162 alpha_test.ref = 0.0f; 180 alpha_test.ref = 0.0f;
163} 181}
164 182
183void OpenGLState::SetDefaultViewports() {
184 for (auto& item : viewports) {
185 item.x = 0;
186 item.y = 0;
187 item.width = 0;
188 item.height = 0;
189 item.depth_range_near = 0.0f;
190 item.depth_range_far = 1.0f;
191 item.scissor.enabled = false;
192 item.scissor.x = 0;
193 item.scissor.y = 0;
194 item.scissor.width = 0;
195 item.scissor.height = 0;
196 }
197
198 depth_clamp.far_plane = false;
199 depth_clamp.near_plane = false;
200}
201
165void OpenGLState::ApplyDefaultState() { 202void OpenGLState::ApplyDefaultState() {
166 glEnable(GL_BLEND); 203 glEnable(GL_BLEND);
167 glDisable(GL_FRAMEBUFFER_SRGB); 204 glDisable(GL_FRAMEBUFFER_SRGB);
@@ -474,56 +511,25 @@ void OpenGLState::ApplyAlphaTest() const {
474} 511}
475 512
476void OpenGLState::ApplyTextures() const { 513void OpenGLState::ApplyTextures() const {
477 bool has_delta{}; 514 if (const auto update = UpdateArray(cur_state.textures, textures)) {
478 std::size_t first{}; 515 glBindTextures(update->first, update->second, textures.data() + update->first);
479 std::size_t last{};
480 std::array<GLuint, Maxwell::NumTextureSamplers> textures;
481
482 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
483 const auto& texture_unit = texture_units[i];
484 auto& cur_state_texture_unit = cur_state.texture_units[i];
485 textures[i] = texture_unit.texture;
486 if (cur_state_texture_unit.texture == textures[i]) {
487 continue;
488 }
489 cur_state_texture_unit.texture = textures[i];
490 if (!has_delta) {
491 first = i;
492 has_delta = true;
493 }
494 last = i;
495 }
496 if (has_delta) {
497 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
498 textures.data() + first);
499 } 516 }
500} 517}
501 518
502void OpenGLState::ApplySamplers() const { 519void OpenGLState::ApplySamplers() const {
503 bool has_delta{}; 520 if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
504 std::size_t first{}; 521 glBindSamplers(update->first, update->second, samplers.data() + update->first);
505 std::size_t last{};
506 std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
507
508 for (std::size_t i = 0; i < std::size(samplers); ++i) {
509 samplers[i] = texture_units[i].sampler;
510 if (cur_state.texture_units[i].sampler == texture_units[i].sampler) {
511 continue;
512 }
513 cur_state.texture_units[i].sampler = texture_units[i].sampler;
514 if (!has_delta) {
515 first = i;
516 has_delta = true;
517 }
518 last = i;
519 } 522 }
520 if (has_delta) { 523}
521 glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), 524
522 samplers.data() + first); 525void OpenGLState::ApplyImages() const {
526 if (const auto update = UpdateArray(cur_state.images, images)) {
527 glBindImageTextures(update->first, update->second, images.data() + update->first);
523 } 528 }
524} 529}
525 530
526void OpenGLState::Apply() const { 531void OpenGLState::Apply() {
532 MICROPROFILE_SCOPE(OpenGL_State);
527 ApplyFramebufferState(); 533 ApplyFramebufferState();
528 ApplyVertexArrayState(); 534 ApplyVertexArrayState();
529 ApplyShaderProgram(); 535 ApplyShaderProgram();
@@ -532,19 +538,32 @@ void OpenGLState::Apply() const {
532 ApplyPointSize(); 538 ApplyPointSize();
533 ApplyFragmentColorClamp(); 539 ApplyFragmentColorClamp();
534 ApplyMultisample(); 540 ApplyMultisample();
541 if (dirty.color_mask) {
542 ApplyColorMask();
543 dirty.color_mask = false;
544 }
535 ApplyDepthClamp(); 545 ApplyDepthClamp();
536 ApplyColorMask();
537 ApplyViewport(); 546 ApplyViewport();
538 ApplyStencilTest(); 547 if (dirty.stencil_state) {
548 ApplyStencilTest();
549 dirty.stencil_state = false;
550 }
539 ApplySRgb(); 551 ApplySRgb();
540 ApplyCulling(); 552 ApplyCulling();
541 ApplyDepth(); 553 ApplyDepth();
542 ApplyPrimitiveRestart(); 554 ApplyPrimitiveRestart();
543 ApplyBlending(); 555 if (dirty.blend_state) {
556 ApplyBlending();
557 dirty.blend_state = false;
558 }
544 ApplyLogicOp(); 559 ApplyLogicOp();
545 ApplyTextures(); 560 ApplyTextures();
546 ApplySamplers(); 561 ApplySamplers();
547 ApplyPolygonOffset(); 562 ApplyImages();
563 if (dirty.polygon_offset) {
564 ApplyPolygonOffset();
565 dirty.polygon_offset = false;
566 }
548 ApplyAlphaTest(); 567 ApplyAlphaTest();
549} 568}
550 569
@@ -571,18 +590,18 @@ void OpenGLState::EmulateViewportWithScissor() {
571} 590}
572 591
573OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { 592OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
574 for (auto& unit : texture_units) { 593 for (auto& texture : textures) {
575 if (unit.texture == handle) { 594 if (texture == handle) {
576 unit.Unbind(); 595 texture = 0;
577 } 596 }
578 } 597 }
579 return *this; 598 return *this;
580} 599}
581 600
582OpenGLState& OpenGLState::ResetSampler(GLuint handle) { 601OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
583 for (auto& unit : texture_units) { 602 for (auto& sampler : samplers) {
584 if (unit.sampler == handle) { 603 if (sampler == handle) {
585 unit.sampler = 0; 604 sampler = 0;
586 } 605 }
587 } 606 }
588 return *this; 607 return *this;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..949b13051 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -118,21 +118,9 @@ public:
118 GLenum operation; 118 GLenum operation;
119 } logic_op; 119 } logic_op;
120 120
121 // 3 texture units - one for each that is used in PICA fragment shader emulation 121 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
122 struct TextureUnit { 122 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
123 GLuint texture; // GL_TEXTURE_BINDING_2D 123 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
124 GLuint sampler; // GL_SAMPLER_BINDING
125
126 void Unbind() {
127 texture = 0;
128 }
129
130 void Reset() {
131 Unbind();
132 sampler = 0;
133 }
134 };
135 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
136 124
137 struct { 125 struct {
138 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING 126 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
@@ -195,8 +183,9 @@ public:
195 s_rgb_used = false; 183 s_rgb_used = false;
196 } 184 }
197 185
186 void SetDefaultViewports();
198 /// Apply this state as the current OpenGL state 187 /// Apply this state as the current OpenGL state
199 void Apply() const; 188 void Apply();
200 189
201 void ApplyFramebufferState() const; 190 void ApplyFramebufferState() const;
202 void ApplyVertexArrayState() const; 191 void ApplyVertexArrayState() const;
@@ -219,6 +208,7 @@ public:
219 void ApplyLogicOp() const; 208 void ApplyLogicOp() const;
220 void ApplyTextures() const; 209 void ApplyTextures() const;
221 void ApplySamplers() const; 210 void ApplySamplers() const;
211 void ApplyImages() const;
222 void ApplyDepthClamp() const; 212 void ApplyDepthClamp() const;
223 void ApplyPolygonOffset() const; 213 void ApplyPolygonOffset() const;
224 void ApplyAlphaTest() const; 214 void ApplyAlphaTest() const;
@@ -237,11 +227,41 @@ public:
237 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test 227 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
238 void EmulateViewportWithScissor(); 228 void EmulateViewportWithScissor();
239 229
230 void MarkDirtyBlendState() {
231 dirty.blend_state = true;
232 }
233
234 void MarkDirtyStencilState() {
235 dirty.stencil_state = true;
236 }
237
238 void MarkDirtyPolygonOffset() {
239 dirty.polygon_offset = true;
240 }
241
242 void MarkDirtyColorMask() {
243 dirty.color_mask = true;
244 }
245
246 void AllDirty() {
247 dirty.blend_state = true;
248 dirty.stencil_state = true;
249 dirty.polygon_offset = true;
250 dirty.color_mask = true;
251 }
252
240private: 253private:
241 static OpenGLState cur_state; 254 static OpenGLState cur_state;
242 255
243 // Workaround for sRGB problems caused by QT not supporting srgb output 256 // Workaround for sRGB problems caused by QT not supporting srgb output
244 static bool s_rgb_used; 257 static bool s_rgb_used;
258 struct {
259 bool blend_state;
260 bool stencil_state;
261 bool viewport_state;
262 bool polygon_offset;
263 bool color_mask;
264 } dirty{};
245}; 265};
246 266
247} // namespace OpenGL 267} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index d0b14b3f6..35ba334e4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) 18OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
19 bool use_persistent)
19 : buffer_size(size) { 20 : buffer_size(size) {
20 gl_buffer.Create(); 21 gl_buffer.Create();
21 22
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
29 allocate_size *= 2; 30 allocate_size *= 2;
30 } 31 }
31 32
32 if (GLAD_GL_ARB_buffer_storage) { 33 if (use_persistent) {
33 persistent = true; 34 persistent = true;
34 coherent = prefer_coherent; 35 coherent = prefer_coherent;
35 const GLbitfield flags = 36 const GLbitfield flags =
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 3d18ecb4d..f8383cbd4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -13,7 +13,8 @@ namespace OpenGL {
13 13
14class OGLStreamBuffer : private NonCopyable { 14class OGLStreamBuffer : private NonCopyable {
15public: 15public:
16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); 16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
17 bool use_persistent = true);
17 ~OGLStreamBuffer(); 18 ~OGLStreamBuffer();
18 19
19 GLuint GetHandle() const; 20 GLuint GetHandle() const;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
new file mode 100644
index 000000000..4f135fe03
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -0,0 +1,624 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/bit_util.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "common/scope_exit.h"
10#include "core/core.h"
11#include "video_core/morton.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h"
16#include "video_core/texture_cache/surface_base.h"
17#include "video_core/texture_cache/texture_cache.h"
18#include "video_core/textures/convert.h"
19#include "video_core/textures/texture.h"
20
21namespace OpenGL {
22
23using Tegra::Texture::SwizzleSource;
24using VideoCore::MortonSwizzleMode;
25
26using VideoCore::Surface::ComponentType;
27using VideoCore::Surface::PixelFormat;
28using VideoCore::Surface::SurfaceCompression;
29using VideoCore::Surface::SurfaceTarget;
30using VideoCore::Surface::SurfaceType;
31
32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
34MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35 MP_RGB(128, 192, 128));
36
37namespace {
38
39struct FormatTuple {
40 GLint internal_format;
41 GLenum format;
42 GLenum type;
43 ComponentType component_type;
44 bool compressed;
45};
46
47constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
48 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
49 {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
50 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
51 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
52 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
53 false}, // A2B10G10R10U
54 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
55 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
56 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
57 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
58 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
59 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
60 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
61 false}, // R11FG11FB10F
62 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
63 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
64 true}, // DXT1
65 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
66 true}, // DXT23
67 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
68 true}, // DXT45
69 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
70 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
71 true}, // DXN2UNORM
72 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
73 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
74 true}, // BC7U
75 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
76 true}, // BC6H_UF16
77 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
78 true}, // BC6H_SF16
79 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
80 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
81 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
82 {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
83 {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
84 {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
85 {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
86 {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
87 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
88 {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
89 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
90 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
91 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
92 {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
93 {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
94 {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
95 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
96 false}, // RGBA8_SRGB
97 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
98 {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
99 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
100 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
101 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
102 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
103 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
104 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
105 // Compressed sRGB formats
106 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
107 true}, // DXT1_SRGB
108 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
109 true}, // DXT23_SRGB
110 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
111 true}, // DXT45_SRGB
112 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
113 true}, // BC7U_SRGB
114 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
115 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
116 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
118 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
119 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
120 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
121 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
122
123 // Depth formats
124 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
125 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
126 false}, // Z16
127
128 // DepthStencil formats
129 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
130 false}, // Z24S8
131 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
132 false}, // S8Z24
133 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
134 ComponentType::Float, false}, // Z32FS8
135}};
136
137const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
138 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
139 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
140 return format;
141}
142
143GLenum GetTextureTarget(const SurfaceTarget& target) {
144 switch (target) {
145 case SurfaceTarget::TextureBuffer:
146 return GL_TEXTURE_BUFFER;
147 case SurfaceTarget::Texture1D:
148 return GL_TEXTURE_1D;
149 case SurfaceTarget::Texture2D:
150 return GL_TEXTURE_2D;
151 case SurfaceTarget::Texture3D:
152 return GL_TEXTURE_3D;
153 case SurfaceTarget::Texture1DArray:
154 return GL_TEXTURE_1D_ARRAY;
155 case SurfaceTarget::Texture2DArray:
156 return GL_TEXTURE_2D_ARRAY;
157 case SurfaceTarget::TextureCubemap:
158 return GL_TEXTURE_CUBE_MAP;
159 case SurfaceTarget::TextureCubeArray:
160 return GL_TEXTURE_CUBE_MAP_ARRAY;
161 }
162 UNREACHABLE();
163 return {};
164}
165
166GLint GetSwizzleSource(SwizzleSource source) {
167 switch (source) {
168 case SwizzleSource::Zero:
169 return GL_ZERO;
170 case SwizzleSource::R:
171 return GL_RED;
172 case SwizzleSource::G:
173 return GL_GREEN;
174 case SwizzleSource::B:
175 return GL_BLUE;
176 case SwizzleSource::A:
177 return GL_ALPHA;
178 case SwizzleSource::OneInt:
179 case SwizzleSource::OneFloat:
180 return GL_ONE;
181 }
182 UNREACHABLE();
183 return GL_NONE;
184}
185
186void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
187 if (params.IsBuffer()) {
188 return;
189 }
190 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
191 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
192 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
193 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
194 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
195 if (params.num_levels == 1) {
196 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
197 }
198}
199
200OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format,
201 OGLBuffer& texture_buffer) {
202 OGLTexture texture;
203 texture.Create(target);
204
205 switch (params.target) {
206 case SurfaceTarget::Texture1D:
207 glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width);
208 break;
209 case SurfaceTarget::TextureBuffer:
210 texture_buffer.Create();
211 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
212 nullptr, GL_DYNAMIC_STORAGE_BIT);
213 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
214 break;
215 case SurfaceTarget::Texture2D:
216 case SurfaceTarget::TextureCubemap:
217 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
218 params.height);
219 break;
220 case SurfaceTarget::Texture3D:
221 case SurfaceTarget::Texture2DArray:
222 case SurfaceTarget::TextureCubeArray:
223 glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width,
224 params.height, params.depth);
225 break;
226 default:
227 UNREACHABLE();
228 }
229
230 ApplyTextureDefaults(params, texture.handle);
231
232 return texture;
233}
234
235} // Anonymous namespace
236
237CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
238 : VideoCommon::SurfaceBase<View>(gpu_addr, params) {
239 const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
240 internal_format = tuple.internal_format;
241 format = tuple.format;
242 type = tuple.type;
243 is_compressed = tuple.compressed;
244 target = GetTextureTarget(params.target);
245 texture = CreateTexture(params, target, internal_format, texture_buffer);
246 DecorateSurfaceName();
247 main_view = CreateViewInner(
248 ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
249 true);
250}
251
252CachedSurface::~CachedSurface() = default;
253
254void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
255 MICROPROFILE_SCOPE(OpenGL_Texture_Download);
256
257 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
258
259 for (u32 level = 0; level < params.emulated_levels; ++level) {
260 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
261 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
262 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
263 if (is_compressed) {
264 glGetCompressedTextureImage(texture.handle, level,
265 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
266 staging_buffer.data() + mip_offset);
267 } else {
268 glGetTextureImage(texture.handle, level, format, type,
269 static_cast<GLsizei>(params.GetHostMipmapSize(level)),
270 staging_buffer.data() + mip_offset);
271 }
272 }
273}
274
275void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
276 MICROPROFILE_SCOPE(OpenGL_Texture_Upload);
277 SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); });
278 for (u32 level = 0; level < params.emulated_levels; ++level) {
279 UploadTextureMipmap(level, staging_buffer);
280 }
281}
282
283void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
284 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
285 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
286
287 auto compression_type = params.GetCompressionType();
288
289 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
290 ? params.GetConvertedMipmapOffset(level)
291 : params.GetHostMipmapLevelOffset(level);
292 const u8* buffer{staging_buffer.data() + mip_offset};
293 if (is_compressed) {
294 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
295 switch (params.target) {
296 case SurfaceTarget::Texture2D:
297 glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
298 static_cast<GLsizei>(params.GetMipWidth(level)),
299 static_cast<GLsizei>(params.GetMipHeight(level)),
300 internal_format, image_size, buffer);
301 break;
302 case SurfaceTarget::Texture3D:
303 case SurfaceTarget::Texture2DArray:
304 case SurfaceTarget::TextureCubeArray:
305 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
306 static_cast<GLsizei>(params.GetMipWidth(level)),
307 static_cast<GLsizei>(params.GetMipHeight(level)),
308 static_cast<GLsizei>(params.GetMipDepth(level)),
309 internal_format, image_size, buffer);
310 break;
311 case SurfaceTarget::TextureCubemap: {
312 const std::size_t layer_size{params.GetHostLayerSize(level)};
313 for (std::size_t face = 0; face < params.depth; ++face) {
314 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
315 static_cast<GLsizei>(params.GetMipWidth(level)),
316 static_cast<GLsizei>(params.GetMipHeight(level)), 1,
317 internal_format, static_cast<GLsizei>(layer_size),
318 buffer);
319 buffer += layer_size;
320 }
321 break;
322 }
323 default:
324 UNREACHABLE();
325 }
326 } else {
327 switch (params.target) {
328 case SurfaceTarget::Texture1D:
329 glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
330 buffer);
331 break;
332 case SurfaceTarget::TextureBuffer:
333 ASSERT(level == 0);
334 glNamedBufferSubData(texture_buffer.handle, 0,
335 params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
336 break;
337 case SurfaceTarget::Texture1DArray:
338 case SurfaceTarget::Texture2D:
339 glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
340 params.GetMipHeight(level), format, type, buffer);
341 break;
342 case SurfaceTarget::Texture3D:
343 case SurfaceTarget::Texture2DArray:
344 case SurfaceTarget::TextureCubeArray:
345 glTextureSubImage3D(
346 texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
347 static_cast<GLsizei>(params.GetMipHeight(level)),
348 static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
349 break;
350 case SurfaceTarget::TextureCubemap:
351 for (std::size_t face = 0; face < params.depth; ++face) {
352 glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
353 params.GetMipWidth(level), params.GetMipHeight(level), 1,
354 format, type, buffer);
355 buffer += params.GetHostLayerSize(level);
356 }
357 break;
358 default:
359 UNREACHABLE();
360 }
361 }
362}
363
364void CachedSurface::DecorateSurfaceName() {
365 LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
366}
367
368void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
369 LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
370}
371
372View CachedSurface::CreateView(const ViewParams& view_key) {
373 return CreateViewInner(view_key, false);
374}
375
376View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) {
377 auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy);
378 views[view_key] = view;
379 if (!is_proxy)
380 view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
381 return view;
382}
383
384CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
385 const bool is_proxy)
386 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
387 target = GetTextureTarget(params.target);
388 if (!is_proxy) {
389 texture_view = CreateTextureView();
390 }
391 swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
392}
393
394CachedSurfaceView::~CachedSurfaceView() = default;
395
396void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
397 ASSERT(params.num_layers == 1 && params.num_levels == 1);
398
399 const auto& owner_params = surface.GetSurfaceParams();
400
401 switch (owner_params.target) {
402 case SurfaceTarget::Texture1D:
403 glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
404 params.base_level);
405 break;
406 case SurfaceTarget::Texture2D:
407 glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
408 params.base_level);
409 break;
410 case SurfaceTarget::Texture1DArray:
411 case SurfaceTarget::Texture2DArray:
412 case SurfaceTarget::TextureCubemap:
413 case SurfaceTarget::TextureCubeArray:
414 glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
415 params.base_layer);
416 break;
417 default:
418 UNIMPLEMENTED();
419 }
420}
421
422void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
423 SwizzleSource z_source, SwizzleSource w_source) {
424 u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
425 if (new_swizzle == swizzle)
426 return;
427 swizzle = new_swizzle;
428 const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
429 GetSwizzleSource(z_source),
430 GetSwizzleSource(w_source)};
431 const GLuint handle = GetTexture();
432 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
433}
434
435OGLTextureView CachedSurfaceView::CreateTextureView() const {
436 const auto& owner_params = surface.GetSurfaceParams();
437 OGLTextureView texture_view;
438 texture_view.Create();
439
440 const GLuint handle{texture_view.handle};
441 const FormatTuple& tuple{
442 GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
443
444 glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
445 params.num_levels, params.base_layer, params.num_layers);
446
447 ApplyTextureDefaults(owner_params, handle);
448
449 return texture_view;
450}
451
452TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
453 VideoCore::RasterizerInterface& rasterizer,
454 const Device& device)
455 : TextureCacheBase{system, rasterizer} {
456 src_framebuffer.Create();
457 dst_framebuffer.Create();
458}
459
460TextureCacheOpenGL::~TextureCacheOpenGL() = default;
461
462Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
463 return std::make_shared<CachedSurface>(gpu_addr, params);
464}
465
466void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
467 const VideoCommon::CopyParams& copy_params) {
468 const auto& src_params = src_surface->GetSurfaceParams();
469 const auto& dst_params = dst_surface->GetSurfaceParams();
470 if (src_params.type != dst_params.type) {
471 // A fallback is needed
472 return;
473 }
474 const auto src_handle = src_surface->GetTexture();
475 const auto src_target = src_surface->GetTarget();
476 const auto dst_handle = dst_surface->GetTexture();
477 const auto dst_target = dst_surface->GetTarget();
478 glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
479 copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
480 copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
481 copy_params.dest_z, copy_params.width, copy_params.height,
482 copy_params.depth);
483}
484
485void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
486 const Tegra::Engines::Fermi2D::Config& copy_config) {
487 const auto& src_params{src_view->GetSurfaceParams()};
488 const auto& dst_params{dst_view->GetSurfaceParams()};
489
490 OpenGLState prev_state{OpenGLState::GetCurState()};
491 SCOPE_EXIT({
492 prev_state.AllDirty();
493 prev_state.Apply();
494 });
495
496 OpenGLState state;
497 state.draw.read_framebuffer = src_framebuffer.handle;
498 state.draw.draw_framebuffer = dst_framebuffer.handle;
499 state.AllDirty();
500 state.Apply();
501
502 u32 buffers{};
503
504 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
505 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
506
507 if (src_params.type == SurfaceType::ColorTexture) {
508 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
509 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
510 0);
511
512 dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
513 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
514 0);
515
516 buffers = GL_COLOR_BUFFER_BIT;
517 } else if (src_params.type == SurfaceType::Depth) {
518 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
519 src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
520 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
521
522 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
523 dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
524 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
525
526 buffers = GL_DEPTH_BUFFER_BIT;
527 } else if (src_params.type == SurfaceType::DepthStencil) {
528 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
529 src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
530
531 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
532 dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
533
534 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
535 }
536
537 const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
538 const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
539 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
540
541 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
542 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
543 is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
544}
545
546void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
547 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
548 const auto& src_params = src_surface->GetSurfaceParams();
549 const auto& dst_params = dst_surface->GetSurfaceParams();
550 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
551
552 const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
553 const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
554
555 const std::size_t source_size = src_surface->GetHostSizeInBytes();
556 const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
557
558 const std::size_t buffer_size = std::max(source_size, dest_size);
559
560 GLuint copy_pbo_handle = FetchPBO(buffer_size);
561
562 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
563
564 if (source_format.compressed) {
565 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
566 nullptr);
567 } else {
568 glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
569 static_cast<GLsizei>(source_size), nullptr);
570 }
571 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
572
573 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
574
575 const GLsizei width = static_cast<GLsizei>(dst_params.width);
576 const GLsizei height = static_cast<GLsizei>(dst_params.height);
577 const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
578 if (dest_format.compressed) {
579 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
580 UNREACHABLE();
581 } else {
582 switch (dst_params.target) {
583 case SurfaceTarget::Texture1D:
584 glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
585 dest_format.type, nullptr);
586 break;
587 case SurfaceTarget::Texture2D:
588 glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
589 dest_format.format, dest_format.type, nullptr);
590 break;
591 case SurfaceTarget::Texture3D:
592 case SurfaceTarget::Texture2DArray:
593 case SurfaceTarget::TextureCubeArray:
594 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
595 dest_format.format, dest_format.type, nullptr);
596 break;
597 case SurfaceTarget::TextureCubemap:
598 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
599 dest_format.format, dest_format.type, nullptr);
600 break;
601 default:
602 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
603 static_cast<u32>(dst_params.target));
604 UNREACHABLE();
605 }
606 }
607 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
608
609 glTextureBarrier();
610}
611
612GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
613 ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; });
614 const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
615 OGLBuffer& cp = copy_pbo_cache[l2];
616 if (cp.handle == 0) {
617 const std::size_t ceil_size = 1ULL << l2;
618 cp.Create();
619 cp.MakeStreamCopy(ceil_size);
620 }
621 return cp.handle;
622}
623
624} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
new file mode 100644
index 000000000..8e13ab38b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -0,0 +1,147 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <functional>
9#include <memory>
10#include <unordered_map>
11#include <utility>
12#include <vector>
13
14#include <glad/glad.h>
15
16#include "common/common_types.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/texture_cache/texture_cache.h"
21
22namespace OpenGL {
23
24using VideoCommon::SurfaceParams;
25using VideoCommon::ViewParams;
26
27class CachedSurfaceView;
28class CachedSurface;
29class TextureCacheOpenGL;
30
31using Surface = std::shared_ptr<CachedSurface>;
32using View = std::shared_ptr<CachedSurfaceView>;
33using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
34
35class CachedSurface final : public VideoCommon::SurfaceBase<View> {
36 friend CachedSurfaceView;
37
38public:
39 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params);
40 ~CachedSurface();
41
42 void UploadTexture(const std::vector<u8>& staging_buffer) override;
43 void DownloadTexture(std::vector<u8>& staging_buffer) override;
44
45 GLenum GetTarget() const {
46 return target;
47 }
48
49 GLuint GetTexture() const {
50 return texture.handle;
51 }
52
53protected:
54 void DecorateSurfaceName() override;
55
56 View CreateView(const ViewParams& view_key) override;
57 View CreateViewInner(const ViewParams& view_key, bool is_proxy);
58
59private:
60 void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
61
62 GLenum internal_format{};
63 GLenum format{};
64 GLenum type{};
65 bool is_compressed{};
66 GLenum target{};
67 u32 view_count{};
68
69 OGLTexture texture;
70 OGLBuffer texture_buffer;
71};
72
73class CachedSurfaceView final : public VideoCommon::ViewBase {
74public:
75 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
76 ~CachedSurfaceView();
77
78 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
79 void Attach(GLenum attachment, GLenum target) const;
80
81 void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
82 Tegra::Texture::SwizzleSource y_source,
83 Tegra::Texture::SwizzleSource z_source,
84 Tegra::Texture::SwizzleSource w_source);
85
86 void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
87
88 void MarkAsModified(u64 tick) {
89 surface.MarkAsModified(true, tick);
90 }
91
92 GLuint GetTexture() const {
93 if (is_proxy) {
94 return surface.GetTexture();
95 }
96 return texture_view.handle;
97 }
98
99 const SurfaceParams& GetSurfaceParams() const {
100 return surface.GetSurfaceParams();
101 }
102
103private:
104 u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
105 Tegra::Texture::SwizzleSource y_source,
106 Tegra::Texture::SwizzleSource z_source,
107 Tegra::Texture::SwizzleSource w_source) const {
108 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
109 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
110 }
111
112 OGLTextureView CreateTextureView() const;
113
114 CachedSurface& surface;
115 GLenum target{};
116
117 OGLTextureView texture_view;
118 u32 swizzle{};
119 bool is_proxy{};
120};
121
122class TextureCacheOpenGL final : public TextureCacheBase {
123public:
124 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
125 const Device& device);
126 ~TextureCacheOpenGL();
127
128protected:
129 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
130
131 void ImageCopy(Surface& src_surface, Surface& dst_surface,
132 const VideoCommon::CopyParams& copy_params) override;
133
134 void ImageBlit(View& src_view, View& dst_view,
135 const Tegra::Engines::Fermi2D::Config& copy_config) override;
136
137 void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
138
139private:
140 GLuint FetchPBO(std::size_t buffer_size);
141
142 OGLFramebuffer src_framebuffer;
143 OGLFramebuffer dst_framebuffer;
144 std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
145};
146
147} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index aafd6f31b..839178152 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,21 +101,19 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
101 101
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104/// Swap buffers (render frame) 104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
105void RendererOpenGL::SwapBuffers(
106 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
107
108 system.GetPerfStats().EndSystemFrame(); 105 system.GetPerfStats().EndSystemFrame();
109 106
110 // Maintain the rasterizer's state as a priority 107 // Maintain the rasterizer's state as a priority
111 OpenGLState prev_state = OpenGLState::GetCurState(); 108 OpenGLState prev_state = OpenGLState::GetCurState();
109 state.AllDirty();
112 state.Apply(); 110 state.Apply();
113 111
114 if (framebuffer) { 112 if (framebuffer) {
115 // If framebuffer is provided, reload it from memory to a texture 113 // If framebuffer is provided, reload it from memory to a texture
116 if (screen_info.texture.width != (GLsizei)framebuffer->get().width || 114 if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
117 screen_info.texture.height != (GLsizei)framebuffer->get().height || 115 screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
118 screen_info.texture.pixel_format != framebuffer->get().pixel_format) { 116 screen_info.texture.pixel_format != framebuffer->pixel_format) {
119 // Reallocate texture if the framebuffer size has changed. 117 // Reallocate texture if the framebuffer size has changed.
120 // This is expected to not happen very often and hence should not be a 118 // This is expected to not happen very often and hence should not be a
121 // performance problem. 119 // performance problem.
@@ -130,6 +128,8 @@ void RendererOpenGL::SwapBuffers(
130 128
131 DrawScreen(render_window.GetFramebufferLayout()); 129 DrawScreen(render_window.GetFramebufferLayout());
132 130
131 rasterizer->TickFrame();
132
133 render_window.SwapBuffers(); 133 render_window.SwapBuffers();
134 } 134 }
135 135
@@ -139,6 +139,7 @@ void RendererOpenGL::SwapBuffers(
139 system.GetPerfStats().BeginSystemFrame(); 139 system.GetPerfStats().BeginSystemFrame();
140 140
141 // Restore the rasterizer state 141 // Restore the rasterizer state
142 prev_state.AllDirty();
142 prev_state.Apply(); 143 prev_state.Apply();
143} 144}
144 145
@@ -146,43 +147,43 @@ void RendererOpenGL::SwapBuffers(
146 * Loads framebuffer from emulated memory into the active OpenGL texture. 147 * Loads framebuffer from emulated memory into the active OpenGL texture.
147 */ 148 */
148void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { 149void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
149 const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
150 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
151 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
152
153 // Framebuffer orientation handling 150 // Framebuffer orientation handling
154 framebuffer_transform_flags = framebuffer.transform_flags; 151 framebuffer_transform_flags = framebuffer.transform_flags;
155 framebuffer_crop_rect = framebuffer.crop_rect; 152 framebuffer_crop_rect = framebuffer.crop_rect;
156 153
157 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default 154 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
158 // only allows rows to have a memory alignement of 4. 155 if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
159 ASSERT(framebuffer.stride % 4 == 0); 156 return;
160 157 }
161 if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
162 // Reset the screen info's display texture to its own permanent texture
163 screen_info.display_texture = screen_info.texture.resource.handle;
164
165 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
166
167 constexpr u32 linear_bpp = 4;
168 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
169 framebuffer.width, framebuffer.height, bytes_per_pixel,
170 linear_bpp, Memory::GetPointer(framebuffer_addr),
171 gl_framebuffer_data.data());
172
173 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
174 158
175 // Update existing texture 159 // Reset the screen info's display texture to its own permanent texture
176 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that 160 screen_info.display_texture = screen_info.texture.resource.handle;
177 // they differ from the LCD resolution.
178 // TODO: Applications could theoretically crash yuzu here by specifying too large
179 // framebuffer sizes. We should make sure that this cannot happen.
180 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
181 framebuffer.height, screen_info.texture.gl_format,
182 screen_info.texture.gl_type, gl_framebuffer_data.data());
183 161
184 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 162 const auto pixel_format{
185 } 163 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
164 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
165 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
166 const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
167 rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
168
169 // TODO(Rodrigo): Read this from HLE
170 constexpr u32 block_height_log2 = 4;
171 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
172 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
173 gl_framebuffer_data.data(), host_ptr);
174
175 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
176
177 // Update existing texture
178 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
179 // they differ from the LCD resolution.
180 // TODO: Applications could theoretically crash yuzu here by specifying too large
181 // framebuffer sizes. We should make sure that this cannot happen.
182 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
183 framebuffer.height, screen_info.texture.gl_format,
184 screen_info.texture.gl_type, gl_framebuffer_data.data());
185
186 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
186} 187}
187 188
188/** 189/**
@@ -205,6 +206,7 @@ void RendererOpenGL::InitOpenGLObjects() {
205 // Link shaders and get variable locations 206 // Link shaders and get variable locations
206 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 207 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
207 state.draw.shader_program = shader.handle; 208 state.draw.shader_program = shader.handle;
209 state.AllDirty();
208 state.Apply(); 210 state.Apply();
209 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 211 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
210 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); 212 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -262,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() {
262 if (rasterizer) { 264 if (rasterizer) {
263 return; 265 return;
264 } 266 }
265 // Initialize sRGB Usage
266 OpenGLState::ClearsRGBUsed(); 267 OpenGLState::ClearsRGBUsed();
267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 268 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
268} 269}
@@ -273,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
273 texture.height = framebuffer.height; 274 texture.height = framebuffer.height;
274 texture.pixel_format = framebuffer.pixel_format; 275 texture.pixel_format = framebuffer.pixel_format;
275 276
277 const auto pixel_format{
278 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
279 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
280 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
281
276 GLint internal_format; 282 GLint internal_format;
277 switch (framebuffer.pixel_format) { 283 switch (framebuffer.pixel_format) {
278 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 284 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
279 internal_format = GL_RGBA8; 285 internal_format = GL_RGBA8;
280 texture.gl_format = GL_RGBA; 286 texture.gl_format = GL_RGBA;
281 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 287 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
282 gl_framebuffer_data.resize(texture.width * texture.height * 4); 288 break;
289 case Tegra::FramebufferConfig::PixelFormat::RGB565:
290 internal_format = GL_RGB565;
291 texture.gl_format = GL_RGB;
292 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
283 break; 293 break;
284 default: 294 default:
285 internal_format = GL_RGBA8; 295 internal_format = GL_RGBA8;
286 texture.gl_format = GL_RGBA; 296 texture.gl_format = GL_RGBA;
287 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 297 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
288 gl_framebuffer_data.resize(texture.width * texture.height * 4); 298 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
289 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}", 299 static_cast<u32>(framebuffer.pixel_format));
290 static_cast<u32>(framebuffer.pixel_format));
291 UNREACHABLE();
292 } 300 }
293 301
294 texture.resource.Release(); 302 texture.resource.Release();
@@ -334,16 +342,18 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
334 ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), 342 ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
335 }}; 343 }};
336 344
337 state.texture_units[0].texture = screen_info.display_texture; 345 state.textures[0] = screen_info.display_texture;
338 // Workaround brigthness problems in SMO by enabling sRGB in the final output 346 // Workaround brigthness problems in SMO by enabling sRGB in the final output
339 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 347 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
340 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 348 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
349 state.AllDirty();
341 state.Apply(); 350 state.Apply();
342 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); 351 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
343 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 352 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
344 // Restore default state 353 // Restore default state
345 state.framebuffer_srgb.enabled = false; 354 state.framebuffer_srgb.enabled = false;
346 state.texture_units[0].texture = 0; 355 state.textures[0] = 0;
356 state.AllDirty();
347 state.Apply(); 357 state.Apply();
348 // Clear sRGB state for the next frame 358 // Clear sRGB state for the next frame
349 OpenGLState::ClearsRGBUsed(); 359 OpenGLState::ClearsRGBUsed();
@@ -388,6 +398,7 @@ void RendererOpenGL::CaptureScreenshot() {
388 GLuint old_read_fb = state.draw.read_framebuffer; 398 GLuint old_read_fb = state.draw.read_framebuffer;
389 GLuint old_draw_fb = state.draw.draw_framebuffer; 399 GLuint old_draw_fb = state.draw.draw_framebuffer;
390 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; 400 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
401 state.AllDirty();
391 state.Apply(); 402 state.Apply();
392 403
393 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 404 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +418,7 @@ void RendererOpenGL::CaptureScreenshot() {
407 screenshot_framebuffer.Release(); 418 screenshot_framebuffer.Release();
408 state.draw.read_framebuffer = old_read_fb; 419 state.draw.read_framebuffer = old_read_fb;
409 state.draw.draw_framebuffer = old_draw_fb; 420 state.draw.draw_framebuffer = old_draw_fb;
421 state.AllDirty();
410 state.Apply(); 422 state.Apply();
411 glDeleteRenderbuffers(1, &renderbuffer); 423 glDeleteRenderbuffers(1, &renderbuffer);
412 424
@@ -471,7 +483,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
471 } 483 }
472} 484}
473 485
474/// Initialize the renderer
475bool RendererOpenGL::Init() { 486bool RendererOpenGL::Init() {
476 Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; 487 Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
477 488
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 4aebf2321..9bd086368 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,14 +43,13 @@ struct ScreenInfo {
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
46class RendererOpenGL : public VideoCore::RendererBase { 46class RendererOpenGL final : public VideoCore::RendererBase {
47public: 47public:
48 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); 48 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
49 ~RendererOpenGL() override; 49 ~RendererOpenGL() override;
50 50
51 /// Swap buffers (render frame) 51 /// Swap buffers (render frame)
52 void SwapBuffers( 52 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
53 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
54 53
55 /// Initialize the renderer 54 /// Initialize the renderer
56 bool Init() override; 55 bool Init() override;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index f23fc9f9d..c504a2c1a 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -5,35 +5,75 @@
5#include <string> 5#include <string>
6#include <fmt/format.h> 6#include <fmt/format.h>
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/scope_exit.h"
10#include "video_core/renderer_opengl/utils.h" 12#include "video_core/renderer_opengl/utils.h"
11 13
12namespace OpenGL { 14namespace OpenGL {
13 15
16VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
17
18VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
19
20void VertexArrayPushBuffer::Setup(GLuint vao_) {
21 vao = vao_;
22 index_buffer = nullptr;
23 vertex_buffers.clear();
24}
25
26void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
27 index_buffer = buffer;
28}
29
30void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
31 GLintptr offset, GLsizei stride) {
32 vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
33}
34
35void VertexArrayPushBuffer::Bind() {
36 if (index_buffer) {
37 glVertexArrayElementBuffer(vao, *index_buffer);
38 }
39
40 // TODO(Rodrigo): Find a way to ARB_multi_bind this
41 for (const auto& entry : vertex_buffers) {
42 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
43 entry.stride);
44 }
45}
46
14BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 47BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
15 48
16BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
17 50
18void BindBuffersRangePushBuffer::Setup(GLuint first_) { 51void BindBuffersRangePushBuffer::Setup(GLuint first_) {
19 first = first_; 52 first = first_;
20 buffers.clear(); 53 buffer_pointers.clear();
21 offsets.clear(); 54 offsets.clear();
22 sizes.clear(); 55 sizes.clear();
23} 56}
24 57
25void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
26 buffers.push_back(buffer); 59 buffer_pointers.push_back(buffer);
27 offsets.push_back(offset); 60 offsets.push_back(offset);
28 sizes.push_back(size); 61 sizes.push_back(size);
29} 62}
30 63
31void BindBuffersRangePushBuffer::Bind() const { 64void BindBuffersRangePushBuffer::Bind() {
32 const std::size_t count{buffers.size()}; 65 // Ensure sizes are valid.
66 const std::size_t count{buffer_pointers.size()};
33 DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); 67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
34 if (count == 0) { 68 if (count == 0) {
35 return; 69 return;
36 } 70 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
37 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), 77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
38 sizes.data()); 78 sizes.data());
39} 79}
@@ -63,4 +103,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie
63 glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); 103 glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
64} 104}
65 105
66} // namespace OpenGL \ No newline at end of file 106} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index b3e9fc499..6c2b45546 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,20 +11,49 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class BindBuffersRangePushBuffer { 14class VertexArrayPushBuffer final {
15public: 15public:
16 BindBuffersRangePushBuffer(GLenum target); 16 explicit VertexArrayPushBuffer();
17 ~VertexArrayPushBuffer();
18
19 void Setup(GLuint vao_);
20
21 void SetIndexBuffer(const GLuint* buffer);
22
23 void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
24 GLsizei stride);
25
26 void Bind();
27
28private:
29 struct Entry {
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35
36 GLuint vao{};
37 const GLuint* index_buffer{};
38 std::vector<Entry> vertex_buffers;
39};
40
41class BindBuffersRangePushBuffer final {
42public:
43 explicit BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
18 45
19 void Setup(GLuint first_); 46 void Setup(GLuint first_);
20 47
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); 48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
22 49
23 void Bind() const; 50 void Bind();
24 51
25private: 52private:
26 GLenum target; 53 GLenum target{};
27 GLuint first; 54 GLuint first{};
55 std::vector<const GLuint*> buffer_pointers;
56
28 std::vector<GLuint> buffers; 57 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets; 58 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes; 59 std::vector<GLsizeiptr> sizes;
@@ -32,4 +61,4 @@ private:
32 61
33void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); 62void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
34 63
35} // namespace OpenGL \ No newline at end of file 64} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 02a9f5ecb..d2e9f4031 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) {
109 } 109 }
110} 110}
111 111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { 112void VKBufferCache::Send() {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); 113 stream_buffer->Send(buffer_offset - buffer_offset_base);
114} 114}
115 115
116void VKBufferCache::AlignBuffer(std::size_t alignment) { 116void VKBufferCache::AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3edf460df..49f13bcdc 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -77,7 +77,7 @@ public:
77 void Reserve(std::size_t max_size); 77 void Reserve(std::size_t max_size);
78 78
79 /// Ensures that the set data is sent to the device. 79 /// Ensures that the set data is sent to the device.
80 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); 80 void Send();
81 81
82 /// Returns the buffer cache handle. 82 /// Returns the buffer cache handle.
83 vk::Buffer GetBuffer() const { 83 vk::Buffer GetBuffer() const {
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index 771b05c73..1f73b716b 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -4,9 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/sampler_cache.h" 8#include "video_core/sampler_cache.h"
12#include "video_core/textures/texture.h" 9#include "video_core/textures/texture.h"
@@ -21,9 +18,9 @@ public:
21 ~VKSamplerCache(); 18 ~VKSamplerCache();
22 19
23protected: 20protected:
24 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 21 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
25 22
26 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; 23 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override;
27 24
28private: 25private:
29 const VKDevice& device; 26 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f1fea1871..0f8116458 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man
19 19
20VKScheduler::~VKScheduler() = default; 20VKScheduler::~VKScheduler() = default;
21 21
22VKExecutionContext VKScheduler::GetExecutionContext() const { 22void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore); 23 SubmitExecution(semaphore);
28 current_fence->Release(); 24 if (release_fence)
25 current_fence->Release();
29 AllocateNewContext(); 26 AllocateNewContext();
30 return GetExecutionContext();
31} 27}
32 28
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { 29void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
34 SubmitExecution(semaphore); 30 SubmitExecution(semaphore);
35 current_fence->Wait(); 31 current_fence->Wait();
36 current_fence->Release(); 32 if (release_fence)
33 current_fence->Release();
37 AllocateNewContext(); 34 AllocateNewContext();
38 return GetExecutionContext();
39} 35}
40 36
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { 37void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cfaf5376f..0e5b49c7f 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -10,10 +10,43 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12class VKDevice; 12class VKDevice;
13class VKExecutionContext;
14class VKFence; 13class VKFence;
15class VKResourceManager; 14class VKResourceManager;
16 15
16class VKFenceView {
17public:
18 VKFenceView() = default;
19 VKFenceView(VKFence* const& fence) : fence{fence} {}
20
21 VKFence* operator->() const noexcept {
22 return fence;
23 }
24
25 operator VKFence&() const noexcept {
26 return *fence;
27 }
28
29private:
30 VKFence* const& fence;
31};
32
33class VKCommandBufferView {
34public:
35 VKCommandBufferView() = default;
36 VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
37
38 const vk::CommandBuffer* operator->() const noexcept {
39 return &cmdbuf;
40 }
41
42 operator vk::CommandBuffer() const noexcept {
43 return cmdbuf;
44 }
45
46private:
47 const vk::CommandBuffer& cmdbuf;
48};
49
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do 50/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers. 51/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler { 52class VKScheduler {
@@ -21,16 +54,21 @@ public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); 54 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler(); 55 ~VKScheduler();
23 56
24 /// Gets the current execution context. 57 /// Gets a reference to the current fence.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const; 58 VKFenceView GetFence() const {
59 return current_fence;
60 }
61
62 /// Gets a reference to the current command buffer.
63 VKCommandBufferView GetCommandBuffer() const {
64 return current_cmdbuf;
65 }
26 66
27 /// Sends the current execution context to the GPU. It invalidates the current execution context 67 /// Sends the current execution context to the GPU.
28 /// and returns a new one. 68 void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30 69
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates 70 /// Sends the current execution context to the GPU and waits for it to complete.
32 /// the current execution context and returns a new one. 71 void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34 72
35private: 73private:
36 void SubmitExecution(vk::Semaphore semaphore); 74 void SubmitExecution(vk::Semaphore semaphore);
@@ -44,26 +82,4 @@ private:
44 VKFence* next_fence = nullptr; 82 VKFence* next_fence = nullptr;
45}; 83};
46 84
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan 85} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 33ad9764a..7675fc7b3 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -430,20 +426,17 @@ private:
430 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, 426 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
431 t_in_uint, "instance_index"); 427 t_in_uint, "instance_index");
432 428
433 bool is_point_size_declared = false;
434 bool is_clip_distances_declared = false; 429 bool is_clip_distances_declared = false;
435 for (const auto index : ir.GetOutputAttributes()) { 430 for (const auto index : ir.GetOutputAttributes()) {
436 if (index == Attribute::Index::PointSize) { 431 if (index == Attribute::Index::ClipDistances0123 ||
437 is_point_size_declared = true; 432 index == Attribute::Index::ClipDistances4567) {
438 } else if (index == Attribute::Index::ClipDistances0123 ||
439 index == Attribute::Index::ClipDistances4567) {
440 is_clip_distances_declared = true; 433 is_clip_distances_declared = true;
441 } 434 }
442 } 435 }
443 436
444 std::vector<Id> members; 437 std::vector<Id> members;
445 members.push_back(t_float4); 438 members.push_back(t_float4);
446 if (is_point_size_declared) { 439 if (ir.UsesPointSize()) {
447 members.push_back(t_float); 440 members.push_back(t_float);
448 } 441 }
449 if (is_clip_distances_declared) { 442 if (is_clip_distances_declared) {
@@ -466,7 +459,7 @@ private:
466 459
467 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); 460 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
468 point_size_index = 461 point_size_index =
469 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); 462 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
470 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", 463 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
471 is_clip_distances_declared); 464 is_clip_distances_declared);
472 465
@@ -712,7 +705,8 @@ private:
712 case Attribute::Index::Position: 705 case Attribute::Index::Position:
713 return AccessElement(t_out_float, per_vertex, position_index, 706 return AccessElement(t_out_float, per_vertex, position_index,
714 abuf->GetElement()); 707 abuf->GetElement());
715 case Attribute::Index::PointSize: 708 case Attribute::Index::LayerViewportPointSize:
709 UNIMPLEMENTED_IF(abuf->GetElement() != 3);
716 return AccessElement(t_out_float, per_vertex, point_size_index); 710 return AccessElement(t_out_float, per_vertex, point_size_index);
717 case Attribute::Index::ClipDistances0123: 711 case Attribute::Index::ClipDistances0123:
718 return AccessElement(t_out_float, per_vertex, clip_distances_index, 712 return AccessElement(t_out_float, per_vertex, clip_distances_index,
@@ -741,6 +735,16 @@ private:
741 return {}; 735 return {};
742 } 736 }
743 737
738 Id FCastHalf0(Operation operation) {
739 UNIMPLEMENTED();
740 return {};
741 }
742
743 Id FCastHalf1(Operation operation) {
744 UNIMPLEMENTED();
745 return {};
746 }
747
744 Id HNegate(Operation operation) { 748 Id HNegate(Operation operation) {
745 UNIMPLEMENTED(); 749 UNIMPLEMENTED();
746 return {}; 750 return {};
@@ -751,6 +755,11 @@ private:
751 return {}; 755 return {};
752 } 756 }
753 757
758 Id HCastFloat(Operation operation) {
759 UNIMPLEMENTED();
760 return {};
761 }
762
754 Id HUnpack(Operation operation) { 763 Id HUnpack(Operation operation) {
755 UNIMPLEMENTED(); 764 UNIMPLEMENTED();
756 return {}; 765 return {};
@@ -806,12 +815,7 @@ private:
806 return {}; 815 return {};
807 } 816 }
808 817
809 Id LogicalAll2(Operation operation) { 818 Id LogicalAnd2(Operation operation) {
810 UNIMPLEMENTED();
811 return {};
812 }
813
814 Id LogicalAny2(Operation operation) {
815 UNIMPLEMENTED(); 819 UNIMPLEMENTED();
816 return {}; 820 return {};
817 } 821 }
@@ -935,6 +939,46 @@ private:
935 return {}; 939 return {};
936 } 940 }
937 941
942 Id ImageStore(Operation operation) {
943 UNIMPLEMENTED();
944 return {};
945 }
946
947 Id AtomicImageAdd(Operation operation) {
948 UNIMPLEMENTED();
949 return {};
950 }
951
952 Id AtomicImageMin(Operation operation) {
953 UNIMPLEMENTED();
954 return {};
955 }
956
957 Id AtomicImageMax(Operation operation) {
958 UNIMPLEMENTED();
959 return {};
960 }
961
962 Id AtomicImageAnd(Operation operation) {
963 UNIMPLEMENTED();
964 return {};
965 }
966
967 Id AtomicImageOr(Operation operation) {
968 UNIMPLEMENTED();
969 return {};
970 }
971
972 Id AtomicImageXor(Operation operation) {
973 UNIMPLEMENTED();
974 return {};
975 }
976
977 Id AtomicImageExchange(Operation operation) {
978 UNIMPLEMENTED();
979 return {};
980 }
981
938 Id Branch(Operation operation) { 982 Id Branch(Operation operation) {
939 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 983 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
940 UNIMPLEMENTED_IF(!target); 984 UNIMPLEMENTED_IF(!target);
@@ -944,6 +988,14 @@ private:
944 return {}; 988 return {};
945 } 989 }
946 990
991 Id BranchIndirect(Operation operation) {
992 const Id op_a = VisitOperand<Type::Uint>(operation, 0);
993
994 Emit(OpStore(jmp_to, op_a));
995 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
996 return {};
997 }
998
947 Id PushFlowStack(Operation operation) { 999 Id PushFlowStack(Operation operation) {
948 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1000 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
949 ASSERT(target); 1001 ASSERT(target);
@@ -1055,6 +1107,26 @@ private:
1055 return {}; 1107 return {};
1056 } 1108 }
1057 1109
1110 Id BallotThread(Operation) {
1111 UNIMPLEMENTED();
1112 return {};
1113 }
1114
1115 Id VoteAll(Operation) {
1116 UNIMPLEMENTED();
1117 return {};
1118 }
1119
1120 Id VoteAny(Operation) {
1121 UNIMPLEMENTED();
1122 return {};
1123 }
1124
1125 Id VoteEqual(Operation) {
1126 UNIMPLEMENTED();
1127 return {};
1128 }
1129
1058 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1130 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1059 const std::string& name) { 1131 const std::string& name) {
1060 const Id id = OpVariable(type, storage); 1132 const Id id = OpVariable(type, storage);
@@ -1195,7 +1267,7 @@ private:
1195 return {}; 1267 return {};
1196 } 1268 }
1197 1269
1198 static constexpr OperationDecompilersArray operation_decompilers = { 1270 static constexpr std::array operation_decompilers = {
1199 &SPIRVDecompiler::Assign, 1271 &SPIRVDecompiler::Assign,
1200 1272
1201 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1273 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1208,6 +1280,8 @@ private:
1208 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, 1280 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
1209 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, 1281 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
1210 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, 1282 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
1283 &SPIRVDecompiler::FCastHalf0,
1284 &SPIRVDecompiler::FCastHalf1,
1211 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, 1285 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
1212 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, 1286 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
1213 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, 1287 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1268,6 +1342,7 @@ private:
1268 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, 1342 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
1269 &SPIRVDecompiler::HNegate, 1343 &SPIRVDecompiler::HNegate,
1270 &SPIRVDecompiler::HClamp, 1344 &SPIRVDecompiler::HClamp,
1345 &SPIRVDecompiler::HCastFloat,
1271 &SPIRVDecompiler::HUnpack, 1346 &SPIRVDecompiler::HUnpack,
1272 &SPIRVDecompiler::HMergeF32, 1347 &SPIRVDecompiler::HMergeF32,
1273 &SPIRVDecompiler::HMergeH0, 1348 &SPIRVDecompiler::HMergeH0,
@@ -1280,8 +1355,7 @@ private:
1280 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1355 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1281 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1356 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1282 &SPIRVDecompiler::LogicalPick2, 1357 &SPIRVDecompiler::LogicalPick2,
1283 &SPIRVDecompiler::LogicalAll2, 1358 &SPIRVDecompiler::LogicalAnd2,
1284 &SPIRVDecompiler::LogicalAny2,
1285 1359
1286 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1360 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1287 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1361 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1326,7 +1400,17 @@ private:
1326 &SPIRVDecompiler::TextureQueryLod, 1400 &SPIRVDecompiler::TextureQueryLod,
1327 &SPIRVDecompiler::TexelFetch, 1401 &SPIRVDecompiler::TexelFetch,
1328 1402
1403 &SPIRVDecompiler::ImageStore,
1404 &SPIRVDecompiler::AtomicImageAdd,
1405 &SPIRVDecompiler::AtomicImageMin,
1406 &SPIRVDecompiler::AtomicImageMax,
1407 &SPIRVDecompiler::AtomicImageAnd,
1408 &SPIRVDecompiler::AtomicImageOr,
1409 &SPIRVDecompiler::AtomicImageXor,
1410 &SPIRVDecompiler::AtomicImageExchange,
1411
1329 &SPIRVDecompiler::Branch, 1412 &SPIRVDecompiler::Branch,
1413 &SPIRVDecompiler::BranchIndirect,
1330 &SPIRVDecompiler::PushFlowStack, 1414 &SPIRVDecompiler::PushFlowStack,
1331 &SPIRVDecompiler::PopFlowStack, 1415 &SPIRVDecompiler::PopFlowStack,
1332 &SPIRVDecompiler::Exit, 1416 &SPIRVDecompiler::Exit,
@@ -1342,7 +1426,13 @@ private:
1342 &SPIRVDecompiler::WorkGroupId<0>, 1426 &SPIRVDecompiler::WorkGroupId<0>,
1343 &SPIRVDecompiler::WorkGroupId<1>, 1427 &SPIRVDecompiler::WorkGroupId<1>,
1344 &SPIRVDecompiler::WorkGroupId<2>, 1428 &SPIRVDecompiler::WorkGroupId<2>,
1429
1430 &SPIRVDecompiler::BallotThread,
1431 &SPIRVDecompiler::VoteAll,
1432 &SPIRVDecompiler::VoteAny,
1433 &SPIRVDecompiler::VoteEqual,
1345 }; 1434 };
1435 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1346 1436
1347 const VKDevice& device; 1437 const VKDevice& device;
1348 const ShaderIR& ir; 1438 const ShaderIR& ir;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 58ffa42f2..62f1427f5 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; 46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47} 47}
48 48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { 49void VKStreamBuffer::Send(u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51 51
52 if (invalidation_mark) { 52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. 53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush(); 54 scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark, 55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); }); 56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt; 57 invalidation_mark = std::nullopt;
@@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
62 ReserveWatches(WATCHES_RESERVE_CHUNK); 62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 } 63 }
64 // Add a watch for this allocation. 64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence()); 65 watches[used_watches++]->Watch(scheduler.GetFence());
66 66
67 offset += size; 67 offset += size;
68
69 return exctx;
70} 68}
71 69
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { 70void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 69d036ccd..842e54162 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -37,7 +37,7 @@ public:
37 std::tuple<u8*, u64, bool> Reserve(u64 size); 37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38 38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); 40 void Send(u64 size);
41 41
42 vk::Buffer GetBuffer() const { 42 vk::Buffer GetBuffer() const {
43 return *buffer; 43 return *buffer;
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..ec3a76690
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,481 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <stack>
8#include <unordered_map>
9#include <unordered_set>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/control_flow.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18namespace {
19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode;
21
22constexpr s32 unassigned_branch = -2;
23
24struct Query {
25 u32 address{};
26 std::stack<u32> ssy_stack{};
27 std::stack<u32> pbk_stack{};
28};
29
30struct BlockStack {
31 BlockStack() = default;
32 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
33 std::stack<u32> ssy_stack{};
34 std::stack<u32> pbk_stack{};
35};
36
37struct BlockBranchInfo {
38 Condition condition{};
39 s32 address{exit_branch};
40 bool kill{};
41 bool is_sync{};
42 bool is_brk{};
43 bool ignore{};
44};
45
46struct BlockInfo {
47 u32 start{};
48 u32 end{};
49 bool visited{};
50 BlockBranchInfo branch{};
51
52 bool IsInside(const u32 address) const {
53 return start <= address && address <= end;
54 }
55};
56
57struct CFGRebuildState {
58 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
59 const u32 start)
60 : start{start}, program_code{program_code}, program_size{program_size} {}
61
62 u32 start{};
63 std::vector<BlockInfo> block_info{};
64 std::list<u32> inspect_queries{};
65 std::list<Query> queries{};
66 std::unordered_map<u32, u32> registered{};
67 std::unordered_set<u32> labels{};
68 std::map<u32, u32> ssy_labels{};
69 std::map<u32, u32> pbk_labels{};
70 std::unordered_map<u32, BlockStack> stacks{};
71 const ProgramCode& program_code;
72 const std::size_t program_size;
73};
74
75enum class BlockCollision : u32 { None, Found, Inside };
76
77std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
78 const auto& blocks = state.block_info;
79 for (u32 index = 0; index < blocks.size(); index++) {
80 if (blocks[index].start == address) {
81 return {BlockCollision::Found, index};
82 }
83 if (blocks[index].IsInside(address)) {
84 return {BlockCollision::Inside, index};
85 }
86 }
87 return {BlockCollision::None, 0xFFFFFFFF};
88}
89
90struct ParseInfo {
91 BlockBranchInfo branch_info{};
92 u32 end_address{};
93};
94
95BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
96 auto& it = state.block_info.emplace_back();
97 it.start = start;
98 it.end = end;
99 const u32 index = static_cast<u32>(state.block_info.size() - 1);
100 state.registered.insert({start, index});
101 return it;
102}
103
104Pred GetPredicate(u32 index, bool negated) {
105 return static_cast<Pred>(index + (negated ? 8 : 0));
106}
107
108/**
109 * Returns whether the instruction at the specified offset is a 'sched' instruction.
110 * Sched instructions always appear before a sequence of 3 instructions.
111 */
112constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
113 constexpr u32 SchedPeriod = 4;
114 u32 absolute_offset = offset - main_offset;
115
116 return (absolute_offset % SchedPeriod) == 0;
117}
118
119enum class ParseResult : u32 {
120 ControlCaught,
121 BlockEnd,
122 AbnormalFlow,
123};
124
125std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
126 u32 offset = static_cast<u32>(address);
127 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
128 ParseInfo parse_info{};
129
130 const auto insert_label = [](CFGRebuildState& state, u32 address) {
131 const auto pair = state.labels.emplace(address);
132 if (pair.second) {
133 state.inspect_queries.push_back(address);
134 }
135 };
136
137 while (true) {
138 if (offset >= end_address) {
139 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
140 ASSERT_MSG(false, "Shader passed the current limit!");
141 parse_info.branch_info.address = exit_branch;
142 parse_info.branch_info.ignore = false;
143 break;
144 }
145 if (state.registered.count(offset) != 0) {
146 parse_info.branch_info.address = offset;
147 parse_info.branch_info.ignore = true;
148 break;
149 }
150 if (IsSchedInstruction(offset, state.start)) {
151 offset++;
152 continue;
153 }
154 const Instruction instr = {state.program_code[offset]};
155 const auto opcode = OpCode::Decode(instr);
156 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
157 offset++;
158 continue;
159 }
160
161 switch (opcode->get().GetId()) {
162 case OpCode::Id::EXIT: {
163 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
164 parse_info.branch_info.condition.predicate =
165 GetPredicate(pred_index, instr.negate_pred != 0);
166 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
167 offset++;
168 continue;
169 }
170 const ConditionCode cc = instr.flow_condition_code;
171 parse_info.branch_info.condition.cc = cc;
172 if (cc == ConditionCode::F) {
173 offset++;
174 continue;
175 }
176 parse_info.branch_info.address = exit_branch;
177 parse_info.branch_info.kill = false;
178 parse_info.branch_info.is_sync = false;
179 parse_info.branch_info.is_brk = false;
180 parse_info.branch_info.ignore = false;
181 parse_info.end_address = offset;
182
183 return {ParseResult::ControlCaught, parse_info};
184 }
185 case OpCode::Id::BRA: {
186 if (instr.bra.constant_buffer != 0) {
187 return {ParseResult::AbnormalFlow, parse_info};
188 }
189 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
190 parse_info.branch_info.condition.predicate =
191 GetPredicate(pred_index, instr.negate_pred != 0);
192 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
193 offset++;
194 continue;
195 }
196 const ConditionCode cc = instr.flow_condition_code;
197 parse_info.branch_info.condition.cc = cc;
198 if (cc == ConditionCode::F) {
199 offset++;
200 continue;
201 }
202 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
203 if (branch_offset == 0) {
204 parse_info.branch_info.address = exit_branch;
205 } else {
206 parse_info.branch_info.address = branch_offset;
207 }
208 insert_label(state, branch_offset);
209 parse_info.branch_info.kill = false;
210 parse_info.branch_info.is_sync = false;
211 parse_info.branch_info.is_brk = false;
212 parse_info.branch_info.ignore = false;
213 parse_info.end_address = offset;
214
215 return {ParseResult::ControlCaught, parse_info};
216 }
217 case OpCode::Id::SYNC: {
218 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
219 parse_info.branch_info.condition.predicate =
220 GetPredicate(pred_index, instr.negate_pred != 0);
221 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
222 offset++;
223 continue;
224 }
225 const ConditionCode cc = instr.flow_condition_code;
226 parse_info.branch_info.condition.cc = cc;
227 if (cc == ConditionCode::F) {
228 offset++;
229 continue;
230 }
231 parse_info.branch_info.address = unassigned_branch;
232 parse_info.branch_info.kill = false;
233 parse_info.branch_info.is_sync = true;
234 parse_info.branch_info.is_brk = false;
235 parse_info.branch_info.ignore = false;
236 parse_info.end_address = offset;
237
238 return {ParseResult::ControlCaught, parse_info};
239 }
240 case OpCode::Id::BRK: {
241 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
242 parse_info.branch_info.condition.predicate =
243 GetPredicate(pred_index, instr.negate_pred != 0);
244 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
245 offset++;
246 continue;
247 }
248 const ConditionCode cc = instr.flow_condition_code;
249 parse_info.branch_info.condition.cc = cc;
250 if (cc == ConditionCode::F) {
251 offset++;
252 continue;
253 }
254 parse_info.branch_info.address = unassigned_branch;
255 parse_info.branch_info.kill = false;
256 parse_info.branch_info.is_sync = false;
257 parse_info.branch_info.is_brk = true;
258 parse_info.branch_info.ignore = false;
259 parse_info.end_address = offset;
260
261 return {ParseResult::ControlCaught, parse_info};
262 }
263 case OpCode::Id::KIL: {
264 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
265 parse_info.branch_info.condition.predicate =
266 GetPredicate(pred_index, instr.negate_pred != 0);
267 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
268 offset++;
269 continue;
270 }
271 const ConditionCode cc = instr.flow_condition_code;
272 parse_info.branch_info.condition.cc = cc;
273 if (cc == ConditionCode::F) {
274 offset++;
275 continue;
276 }
277 parse_info.branch_info.address = exit_branch;
278 parse_info.branch_info.kill = true;
279 parse_info.branch_info.is_sync = false;
280 parse_info.branch_info.is_brk = false;
281 parse_info.branch_info.ignore = false;
282 parse_info.end_address = offset;
283
284 return {ParseResult::ControlCaught, parse_info};
285 }
286 case OpCode::Id::SSY: {
287 const u32 target = offset + instr.bra.GetBranchTarget();
288 insert_label(state, target);
289 state.ssy_labels.emplace(offset, target);
290 break;
291 }
292 case OpCode::Id::PBK: {
293 const u32 target = offset + instr.bra.GetBranchTarget();
294 insert_label(state, target);
295 state.pbk_labels.emplace(offset, target);
296 break;
297 }
298 case OpCode::Id::BRX: {
299 return {ParseResult::AbnormalFlow, parse_info};
300 }
301 default:
302 break;
303 }
304
305 offset++;
306 }
307 parse_info.branch_info.kill = false;
308 parse_info.branch_info.is_sync = false;
309 parse_info.branch_info.is_brk = false;
310 parse_info.end_address = offset - 1;
311 return {ParseResult::BlockEnd, parse_info};
312}
313
314bool TryInspectAddress(CFGRebuildState& state) {
315 if (state.inspect_queries.empty()) {
316 return false;
317 }
318
319 const u32 address = state.inspect_queries.front();
320 state.inspect_queries.pop_front();
321 const auto [result, block_index] = TryGetBlock(state, address);
322 switch (result) {
323 case BlockCollision::Found: {
324 return true;
325 }
326 case BlockCollision::Inside: {
327 // This case is the tricky one:
328 // We need to Split the block in 2 sepparate blocks
329 const u32 end = state.block_info[block_index].end;
330 BlockInfo& new_block = CreateBlockInfo(state, address, end);
331 BlockInfo& current_block = state.block_info[block_index];
332 current_block.end = address - 1;
333 new_block.branch = current_block.branch;
334 BlockBranchInfo forward_branch{};
335 forward_branch.address = address;
336 forward_branch.ignore = true;
337 current_block.branch = forward_branch;
338 return true;
339 }
340 default:
341 break;
342 }
343 const auto [parse_result, parse_info] = ParseCode(state, address);
344 if (parse_result == ParseResult::AbnormalFlow) {
345 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
346 return false;
347 }
348
349 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
350 block_info.branch = parse_info.branch_info;
351 if (parse_info.branch_info.condition.IsUnconditional()) {
352 return true;
353 }
354
355 const u32 fallthrough_address = parse_info.end_address + 1;
356 state.inspect_queries.push_front(fallthrough_address);
357 return true;
358}
359
360bool TryQuery(CFGRebuildState& state) {
361 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
362 BlockInfo& block) {
363 auto gather_start = labels.lower_bound(block.start);
364 const auto gather_end = labels.upper_bound(block.end);
365 while (gather_start != gather_end) {
366 cc.push(gather_start->second);
367 ++gather_start;
368 }
369 };
370 if (state.queries.empty()) {
371 return false;
372 }
373
374 Query& q = state.queries.front();
375 const u32 block_index = state.registered[q.address];
376 BlockInfo& block = state.block_info[block_index];
377 // If the block is visited, check if the stacks match, else gather the ssy/pbk
378 // labels into the current stack and look if the branch at the end of the block
379 // consumes a label. Schedule new queries accordingly
380 if (block.visited) {
381 BlockStack& stack = state.stacks[q.address];
382 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
383 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
384 state.queries.pop_front();
385 return all_okay;
386 }
387 block.visited = true;
388 state.stacks.insert_or_assign(q.address, BlockStack{q});
389
390 Query q2(q);
391 state.queries.pop_front();
392 gather_labels(q2.ssy_stack, state.ssy_labels, block);
393 gather_labels(q2.pbk_stack, state.pbk_labels, block);
394 if (!block.branch.condition.IsUnconditional()) {
395 q2.address = block.end + 1;
396 state.queries.push_back(q2);
397 }
398
399 Query conditional_query{q2};
400 if (block.branch.is_sync) {
401 if (block.branch.address == unassigned_branch) {
402 block.branch.address = conditional_query.ssy_stack.top();
403 }
404 conditional_query.ssy_stack.pop();
405 }
406 if (block.branch.is_brk) {
407 if (block.branch.address == unassigned_branch) {
408 block.branch.address = conditional_query.pbk_stack.top();
409 }
410 conditional_query.pbk_stack.pop();
411 }
412 conditional_query.address = block.branch.address;
413 state.queries.push_back(std::move(conditional_query));
414 return true;
415}
416} // Anonymous namespace
417
418std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
419 std::size_t program_size, u32 start_address) {
420 CFGRebuildState state{program_code, program_size, start_address};
421
422 // Inspect Code and generate blocks
423 state.labels.clear();
424 state.labels.emplace(start_address);
425 state.inspect_queries.push_back(state.start);
426 while (!state.inspect_queries.empty()) {
427 if (!TryInspectAddress(state)) {
428 return {};
429 }
430 }
431
432 // Decompile Stacks
433 state.queries.push_back(Query{state.start, {}, {}});
434 bool decompiled = true;
435 while (!state.queries.empty()) {
436 if (!TryQuery(state)) {
437 decompiled = false;
438 break;
439 }
440 }
441
442 // Sort and organize results
443 std::sort(state.block_info.begin(), state.block_info.end(),
444 [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
445 ShaderCharacteristics result_out{};
446 result_out.decompilable = decompiled;
447 result_out.start = start_address;
448 result_out.end = start_address;
449 for (const auto& block : state.block_info) {
450 ShaderBlock new_block{};
451 new_block.start = block.start;
452 new_block.end = block.end;
453 new_block.ignore_branch = block.branch.ignore;
454 if (!new_block.ignore_branch) {
455 new_block.branch.cond = block.branch.condition;
456 new_block.branch.kills = block.branch.kill;
457 new_block.branch.address = block.branch.address;
458 }
459 result_out.end = std::max(result_out.end, block.end);
460 result_out.blocks.push_back(new_block);
461 }
462 if (result_out.decompilable) {
463 result_out.labels = std::move(state.labels);
464 return {std::move(result_out)};
465 }
466
467 // If it's not decompilable, merge the unlabelled blocks together
468 auto back = result_out.blocks.begin();
469 auto next = std::next(back);
470 while (next != result_out.blocks.end()) {
471 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
472 back->end = next->end;
473 next = result_out.blocks.erase(next);
474 continue;
475 }
476 back = next;
477 ++next;
478 }
479 return {std::move(result_out)};
480}
481} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..b0a5e4f8c
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,79 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <optional>
9#include <unordered_set>
10
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::ConditionCode;
17using Tegra::Shader::Pred;
18
19constexpr s32 exit_branch = -1;
20
21struct Condition {
22 Pred predicate{Pred::UnusedIndex};
23 ConditionCode cc{ConditionCode::T};
24
25 bool IsUnconditional() const {
26 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
27 }
28
29 bool operator==(const Condition& other) const {
30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
31 }
32
33 bool operator!=(const Condition& other) const {
34 return !operator==(other);
35 }
36};
37
38struct ShaderBlock {
39 struct Branch {
40 Condition cond{};
41 bool kills{};
42 s32 address{};
43
44 bool operator==(const Branch& b) const {
45 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
46 }
47
48 bool operator!=(const Branch& b) const {
49 return !operator==(b);
50 }
51 };
52
53 u32 start{};
54 u32 end{};
55 bool ignore_branch{};
56 Branch branch{};
57
58 bool operator==(const ShaderBlock& sb) const {
59 return std::tie(start, end, ignore_branch, branch) ==
60 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
61 }
62
63 bool operator!=(const ShaderBlock& sb) const {
64 return !operator==(sb);
65 }
66};
67
68struct ShaderCharacteristics {
69 std::list<ShaderBlock> blocks{};
70 bool decompilable{};
71 u32 start{};
72 u32 end{};
73 std::unordered_set<u32> labels{};
74};
75
76std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
77 std::size_t program_size, u32 start_address);
78
79} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index a0554c97e..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h" 13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/control_flow.h"
14#include "video_core/shader/node_helper.h" 15#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
21 22
22namespace { 23namespace {
23 24
24/// Merges exit method of two parallel branches.
25constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
26 if (a == ExitMethod::Undetermined) {
27 return b;
28 }
29 if (b == ExitMethod::Undetermined) {
30 return a;
31 }
32 if (a == b) {
33 return a;
34 }
35 return ExitMethod::Conditional;
36}
37
38/** 25/**
39 * Returns whether the instruction at the specified offset is a 'sched' instruction. 26 * Returns whether the instruction at the specified offset is a 'sched' instruction.
40 * Sched instructions always appear before a sequence of 3 instructions. 27 * Sched instructions always appear before a sequence of 3 instructions.
@@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
51void ShaderIR::Decode() { 38void ShaderIR::Decode() {
52 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
53 40
54 std::set<u32> labels; 41 disable_flow_stack = false;
55 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); 42 const auto info = ScanFlow(program_code, program_size, main_offset);
56 if (exit_method != ExitMethod::AlwaysEnd) { 43 if (info) {
57 UNREACHABLE_MSG("Program does not always end"); 44 const auto& shader_info = *info;
58 } 45 coverage_begin = shader_info.start;
59 46 coverage_end = shader_info.end;
60 if (labels.empty()) { 47 if (shader_info.decompilable) {
61 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); 48 disable_flow_stack = true;
49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == static_cast<u32>(exit_branch)) {
51 return;
52 }
53 basic_blocks.insert({label, nodes});
54 };
55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block;
57 u32 current_label = static_cast<u32>(exit_branch);
58 for (auto& block : blocks) {
59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label);
61 current_block.clear();
62 current_label = block.start;
63 }
64 if (!block.ignore_branch) {
65 DecodeRangeInner(current_block, block.start, block.end);
66 InsertControlFlow(current_block, block);
67 } else {
68 DecodeRangeInner(current_block, block.start, block.end + 1);
69 }
70 }
71 insert_block(current_block, current_label);
72 return;
73 }
74 LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
75 // we can't decompile it, fallback to standard method
76 for (const auto& block : shader_info.blocks) {
77 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
78 }
62 return; 79 return;
63 } 80 }
81 LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
82
83 // Now we need to deal with an undecompilable shader. We need to brute force
84 // a shader that captures every position.
85 coverage_begin = main_offset;
86 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
87 coverage_end = shader_end;
88 for (u32 label = main_offset; label < shader_end; label++) {
89 basic_blocks.insert({label, DecodeRange(label, label + 1)});
90 }
91}
64 92
65 labels.insert(main_offset); 93NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
66 94 NodeBlock basic_block;
67 for (const u32 label : labels) { 95 DecodeRangeInner(basic_block, begin, end);
68 const auto next_it = labels.lower_bound(label + 1); 96 return basic_block;
69 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; 97}
70 98
71 basic_blocks.insert({label, DecodeRange(label, next_label)}); 99void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
100 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
101 pc = DecodeInstr(bb, pc);
72 } 102 }
73} 103}
74 104
75ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
76 const auto [iter, inserted] = 106 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
77 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); 107 Node result = n;
78 ExitMethod& exit_method = iter->second; 108 if (cond.cc != ConditionCode::T) {
79 if (!inserted) 109 result = Conditional(GetConditionCode(cond.cc), {result});
80 return exit_method;
81
82 for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
83 coverage_begin = std::min(coverage_begin, offset);
84 coverage_end = std::max(coverage_end, offset + 1);
85
86 const Instruction instr = {program_code[offset]};
87 const auto opcode = OpCode::Decode(instr);
88 if (!opcode)
89 continue;
90 switch (opcode->get().GetId()) {
91 case OpCode::Id::EXIT: {
92 // The EXIT instruction can be predicated, which means that the shader can conditionally
93 // end on this instruction. We have to consider the case where the condition is not met
94 // and check the exit method of that other basic block.
95 using Tegra::Shader::Pred;
96 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
97 return exit_method = ExitMethod::AlwaysEnd;
98 } else {
99 const ExitMethod not_met = Scan(offset + 1, end, labels);
100 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
101 }
102 } 110 }
103 case OpCode::Id::BRA: { 111 if (cond.predicate != Pred::UnusedIndex) {
104 const u32 target = offset + instr.bra.GetBranchTarget(); 112 u32 pred = static_cast<u32>(cond.predicate);
105 labels.insert(target); 113 const bool is_neg = pred > 7;
106 const ExitMethod no_jmp = Scan(offset + 1, end, labels); 114 if (is_neg) {
107 const ExitMethod jmp = Scan(target, end, labels); 115 pred -= 8;
108 return exit_method = ParallelExit(no_jmp, jmp); 116 }
109 } 117 result = Conditional(GetPredicate(pred, is_neg), {result});
110 case OpCode::Id::SSY:
111 case OpCode::Id::PBK: {
112 // The SSY and PBK use a similar encoding as the BRA instruction.
113 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
114 "Constant buffer branching is not supported");
115 const u32 target = offset + instr.bra.GetBranchTarget();
116 labels.insert(target);
117 // Continue scanning for an exit method.
118 break;
119 } 118 }
120 default: 119 return result;
121 break; 120 };
121 if (block.branch.address < 0) {
122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard);
124 n = apply_conditions(block.branch.cond, n);
125 bb.push_back(n);
126 global_code.push_back(n);
127 return;
122 } 128 }
129 Node n = Operation(OperationCode::Exit);
130 n = apply_conditions(block.branch.cond, n);
131 bb.push_back(n);
132 global_code.push_back(n);
133 return;
123 } 134 }
124 return exit_method = ExitMethod::AlwaysReturn; 135 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
125} 136 n = apply_conditions(block.branch.cond, n);
126 137 bb.push_back(n);
127NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 138 global_code.push_back(n);
128 NodeBlock basic_block;
129 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
130 pc = DecodeInstr(basic_block, pc);
131 }
132 return basic_block;
133} 139}
134 140
135u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 141u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
140 146
141 const Instruction instr = {program_code[pc]}; 147 const Instruction instr = {program_code[pc]};
142 const auto opcode = OpCode::Decode(instr); 148 const auto opcode = OpCode::Decode(instr);
149 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
143 150
144 // Decoding failure 151 // Decoding failure
145 if (!opcode) { 152 if (!opcode) {
146 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); 153 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
154 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
155 nv_address, instr.value)));
147 return pc + 1; 156 return pc + 1;
148 } 157 }
149 158
150 bb.push_back( 159 bb.push_back(Comment(
151 Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); 160 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
152 161
153 using Tegra::Shader::Pred; 162 using Tegra::Shader::Pred;
154 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, 163 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
@@ -167,8 +176,10 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
167 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, 176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
168 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
169 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
179 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
170 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 180 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
171 {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, 181 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
182 {OpCode::Type::Image, &ShaderIR::DecodeImage},
172 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 183 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
173 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 184 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
174 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 185 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa..1473c282a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
42 case OpCode::Id::FMUL_R: 42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: { 43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. 44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", 45 if (instr.fmul.tab5cb8_2 != 0) {
46 instr.fmul.tab5cb8_2.Value()); 46 LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 UNIMPLEMENTED_IF_MSG( 47 instr.fmul.tab5cb8_2.Value());
48 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", 48 }
49 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default 49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
50 53
51 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
52 55
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f23..6466fc011 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
24 } 24 }
25 } else { 25 } else {
26 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); 26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
27 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
28 }
27 } 29 }
28 30
29 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); 31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 4221f0c58..32facd6ba 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -14,6 +14,12 @@ using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Register; 15using Tegra::Shader::Register;
16 16
17namespace {
18constexpr OperationCode GetFloatSelector(u64 selector) {
19 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
20}
21} // Anonymous namespace
22
17u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { 23u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 24 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 25 const auto opcode = OpCode::Decode(instr);
@@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
22 case OpCode::Id::I2I_R: 28 case OpCode::Id::I2I_R:
23 case OpCode::Id::I2I_C: 29 case OpCode::Id::I2I_C:
24 case OpCode::Id::I2I_IMM: { 30 case OpCode::Id::I2I_IMM: {
25 UNIMPLEMENTED_IF(instr.conversion.selector); 31 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
26 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 32 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
27 UNIMPLEMENTED_IF(instr.alu.saturate_d); 33 UNIMPLEMENTED_IF(instr.alu.saturate_d);
28 34
@@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
57 case OpCode::Id::I2F_R: 63 case OpCode::Id::I2F_R:
58 case OpCode::Id::I2F_C: 64 case OpCode::Id::I2F_C:
59 case OpCode::Id::I2F_IMM: { 65 case OpCode::Id::I2F_IMM: {
60 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 66 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
61 UNIMPLEMENTED_IF(instr.conversion.selector); 67 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
62 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 68 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
63 "Condition codes generation in I2F is not implemented"); 69 "Condition codes generation in I2F is not implemented");
64 70
@@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
82 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); 88 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
83 89
84 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 90 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
91
92 if (instr.conversion.dst_size == Register::Size::Short) {
93 value = Operation(OperationCode::HCastFloat, PRECISE, value);
94 }
95
85 SetRegister(bb, instr.gpr0, value); 96 SetRegister(bb, instr.gpr0, value);
86 break; 97 break;
87 } 98 }
88 case OpCode::Id::F2F_R: 99 case OpCode::Id::F2F_R:
89 case OpCode::Id::F2F_C: 100 case OpCode::Id::F2F_C:
90 case OpCode::Id::F2F_IMM: { 101 case OpCode::Id::F2F_IMM: {
91 UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); 102 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
92 UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); 103 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
93 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 104 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
94 "Condition codes generation in F2F is not implemented"); 105 "Condition codes generation in F2F is not implemented");
95 106
@@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
107 } 118 }
108 }(); 119 }();
109 120
121 if (instr.conversion.src_size == Register::Size::Short) {
122 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
123 std::move(value));
124 } else {
125 ASSERT(instr.conversion.float_src.selector == 0);
126 }
127
110 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 128 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
111 129
112 value = [&]() { 130 value = [&]() {
@@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
124 default: 142 default:
125 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", 143 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
126 static_cast<u32>(instr.conversion.f2f.rounding.Value())); 144 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
127 return Immediate(0); 145 return value;
128 } 146 }
129 }(); 147 }();
130 value = GetSaturatedFloat(value, instr.alu.saturate_d); 148 value = GetSaturatedFloat(value, instr.alu.saturate_d);
131 149
132 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 150 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
151
152 if (instr.conversion.dst_size == Register::Size::Short) {
153 value = Operation(OperationCode::HCastFloat, PRECISE, value);
154 }
155
133 SetRegister(bb, instr.gpr0, value); 156 SetRegister(bb, instr.gpr0, value);
134 break; 157 break;
135 } 158 }
136 case OpCode::Id::F2I_R: 159 case OpCode::Id::F2I_R:
137 case OpCode::Id::F2I_C: 160 case OpCode::Id::F2I_C:
138 case OpCode::Id::F2I_IMM: { 161 case OpCode::Id::F2I_IMM: {
139 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); 162 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
140 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 163 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
141 "Condition codes generation in F2I is not implemented"); 164 "Condition codes generation in F2I is not implemented");
142 Node value = [&]() { 165 Node value = [&]() {
@@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
153 } 176 }
154 }(); 177 }();
155 178
179 if (instr.conversion.src_size == Register::Size::Short) {
180 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
181 std::move(value));
182 } else {
183 ASSERT(instr.conversion.float_src.selector == 0);
184 }
185
156 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 186 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
157 187
158 value = [&]() { 188 value = [&]() {
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/src/video_core/shader/decode/decode_integer_set.cpp
+++ /dev/null
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca3..ca2f39e8d 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); 20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", 21 if (instr.ffma.tab5980_0 != 1) {
22 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 22 LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", 23 }
24 instr.ffma.tab5980_1.Value()); 24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
25 27
26 const Node op_a = GetRegister(instr.gpr8); 28 const Node op_a = GetRegister(instr.gpr8);
27 29
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index f5013e44a..5614e8a0d 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, 19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
21 instr.fset.neg_a != 0); 20 instr.fset.neg_a != 0);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 2323052b0..200c2c983 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -16,10 +16,9 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, 20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
22 instr.fsetp.neg_a != 0); 21 instr.fsetp.neg_a != 0);
23 Node op_b = [&]() { 22 Node op_b = [&]() {
24 if (instr.is_b_imm) { 23 if (instr.is_b_imm) {
25 return GetImmediate19(instr); 24 return GetImmediate19(instr);
@@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
29 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); 28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
30 } 29 }
31 }(); 30 }();
32 op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); 31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
33 32
34 // We can't use the constant predicate as destination. 33 // We can't use the constant predicate as destination.
35 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); 34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
36 35
37 const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); 36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); 38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39 39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); 40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..840694527 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,43 +18,55 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
21 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); 21 DEBUG_ASSERT(instr.hsetp2.ftz == 0);
22 22
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
46 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
47 break;
48 default:
49 UNREACHABLE();
50 op_b = Immediate(0);
51 }
42 52
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 53 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 54 const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 55
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 56 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 57 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 58 };
53 59
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 60 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 61 const u64 first = instr.hsetp2.pred3;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 62 const u64 second = instr.hsetp2.pred0;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 63 if (h_and) {
64 Node joined = Operation(OperationCode::LogicalAnd2, comparison);
65 Write(first, joined);
66 Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
67 } else {
68 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
69 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
58 } 70 }
59 71
60 return pc; 72 return pc;
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9..5b44cb79c 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { 24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); 25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else { 26 } else {
27 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); 27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 } 28 }
29 29
30 constexpr auto identity = HalfType::H0_H1; 30 constexpr auto identity = HalfType::H0_H1;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
new file mode 100644
index 000000000..d54fb88c9
--- /dev/null
+++ b/src/video_core/shader/decode/image.cpp
@@ -0,0 +1,164 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode;
21
22namespace {
23std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
24 switch (image_type) {
25 case Tegra::Shader::ImageType::Texture1D:
26 case Tegra::Shader::ImageType::TextureBuffer:
27 return 1;
28 case Tegra::Shader::ImageType::Texture1DArray:
29 case Tegra::Shader::ImageType::Texture2D:
30 return 2;
31 case Tegra::Shader::ImageType::Texture2DArray:
32 case Tegra::Shader::ImageType::Texture3D:
33 return 3;
34 }
35 UNREACHABLE();
36 return 1;
37}
38} // Anonymous namespace
39
40u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
41 const Instruction instr = {program_code[pc]};
42 const auto opcode = OpCode::Decode(instr);
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::SUST: {
46 UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
47 UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
48 UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
49
50 std::vector<Node> values;
51 constexpr std::size_t hardcoded_size{4};
52 for (std::size_t i = 0; i < hardcoded_size; ++i) {
53 values.push_back(GetRegister(instr.gpr0.Value() + i));
54 }
55
56 std::vector<Node> coords;
57 const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
58 for (std::size_t i = 0; i < num_coords; ++i) {
59 coords.push_back(GetRegister(instr.gpr8.Value() + i));
60 }
61
62 const auto type{instr.sust.image_type};
63 auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
64 : GetBindlessImage(instr.gpr39, type)};
65 image.MarkWrite();
66
67 MetaImage meta{image, values};
68 bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
69 break;
70 }
71 case OpCode::Id::SUATOM: {
72 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
73
74 Node value = GetRegister(instr.gpr0);
75
76 std::vector<Node> coords;
77 const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
78 for (std::size_t i = 0; i < num_coords; ++i) {
79 coords.push_back(GetRegister(instr.gpr8.Value() + i));
80 }
81
82 const OperationCode operation_code = [instr] {
83 switch (instr.suatom_d.operation) {
84 case Tegra::Shader::ImageAtomicOperation::Add:
85 return OperationCode::AtomicImageAdd;
86 case Tegra::Shader::ImageAtomicOperation::Min:
87 return OperationCode::AtomicImageMin;
88 case Tegra::Shader::ImageAtomicOperation::Max:
89 return OperationCode::AtomicImageMax;
90 case Tegra::Shader::ImageAtomicOperation::And:
91 return OperationCode::AtomicImageAnd;
92 case Tegra::Shader::ImageAtomicOperation::Or:
93 return OperationCode::AtomicImageOr;
94 case Tegra::Shader::ImageAtomicOperation::Xor:
95 return OperationCode::AtomicImageXor;
96 case Tegra::Shader::ImageAtomicOperation::Exch:
97 return OperationCode::AtomicImageExchange;
98 default:
99 UNIMPLEMENTED_MSG("Unimplemented operation={}",
100 static_cast<u32>(instr.suatom_d.operation.Value()));
101 return OperationCode::AtomicImageAdd;
102 }
103 }();
104
105 const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
106 MetaImage meta{image, {std::move(value)}};
107 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
108 break;
109 }
110 default:
111 UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
112 }
113
114 return pc;
115}
116
117Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
118 std::optional<Tegra::Shader::ImageAtomicSize> size) {
119 const auto offset{static_cast<std::size_t>(image.index.Value())};
120 if (const auto image = TryUseExistingImage(offset, type, size)) {
121 return *image;
122 }
123
124 const std::size_t next_index{used_images.size()};
125 return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second;
126}
127
128Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
129 std::optional<Tegra::Shader::ImageAtomicSize> size) {
130 const Node image_register{GetRegister(reg)};
131 const auto [base_image, cbuf_index, cbuf_offset]{
132 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
133 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
134
135 if (const auto image = TryUseExistingImage(cbuf_key, type, size)) {
136 return *image;
137 }
138
139 const std::size_t next_index{used_images.size()};
140 return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size})
141 .first->second;
142}
143
144Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
145 std::optional<Tegra::Shader::ImageAtomicSize> size) {
146 auto it = used_images.find(offset);
147 if (it == used_images.end()) {
148 return nullptr;
149 }
150 auto& image = it->second;
151 ASSERT(image.GetType() == type);
152
153 if (size) {
154 // We know the size, if it's known it has to be the same as before, otherwise we can set it.
155 if (image.IsSizeKnown()) {
156 ASSERT(image.GetSize() == size);
157 } else {
158 image.SetSize(*size);
159 }
160 }
161 return &image;
162}
163
164} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 46e3d5905..59809bcd8 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -14,7 +14,6 @@ using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { 15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18 17
19 const Node op_a = GetRegister(instr.gpr8); 18 const Node op_a = GetRegister(instr.gpr8);
20 const Node op_b = [&]() { 19 const Node op_b = [&]() {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index dd20775d7..25e48fef8 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetRegister(instr.gpr8); 20 const Node op_a = GetRegister(instr.gpr8);
22 21
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
95 const Node op_b = 95 const Node op_b =
96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); 96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
97 97
98 SetTemporal(bb, 0, op_a); 98 SetTemporary(bb, 0, op_a);
99 SetTemporal(bb, 1, op_b); 99 SetTemporary(bb, 1, op_b);
100 SetRegister(bb, instr.gpr0, GetTemporal(0)); 100 SetRegister(bb, instr.gpr0, GetTemporary(0));
101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); 101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
102 break; 102 break;
103 } 103 }
104 default: 104 default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
136 } 136 }
137 }(); 137 }();
138 for (u32 i = 0; i < count; ++i) 138 for (u32 i = 0; i < count; ++i)
139 SetTemporal(bb, i, GetLmem(i * 4)); 139 SetTemporary(bb, i, GetLmem(i * 4));
140 for (u32 i = 0; i < count; ++i) 140 for (u32 i = 0; i < count; ++i)
141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
142 break; 142 break;
143 } 143 }
144 default: 144 default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
174 174
175 SetTemporal(bb, i, gmem); 175 SetTemporary(bb, i, gmem);
176 } 176 }
177 for (u32 i = 0; i < count; ++i) { 177 for (u32 i = 0; i < count; ++i) {
178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
179 } 179 }
180 break; 180 break;
181 } 181 }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
253 TrackAndGetGlobalMemory(bb, instr, true); 253 TrackAndGetGlobalMemory(bb, instr, true);
254 254
255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
256 SetTemporal(bb, 0, real_address_base); 256 SetTemporary(bb, 0, real_address_base);
257 257
258 const u32 count = GetUniformTypeElementsCount(type); 258 const u32 count = GetUniformTypeElementsCount(type);
259 for (u32 i = 0; i < count; ++i) { 259 for (u32 i = 0; i < count; ++i) {
260 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); 260 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
261 } 261 }
262 for (u32 i = 0; i < count; ++i) { 262 for (u32 i = 0; i < count; ++i) {
263 const Node it_offset = Immediate(i * 4); 263 const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
267 267
268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); 268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
269 } 269 }
270 break; 270 break;
271 } 271 }
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
297 const auto addr_register{GetRegister(instr.gmem.gpr)}; 297 const auto addr_register{GetRegister(instr.gmem.gpr)};
298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
299 299
300 const Node base_address{ 300 const auto [base_address, index, offset] =
301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
302 const auto cbuf = std::get_if<CbufNode>(&*base_address); 302 ASSERT(base_address != nullptr);
303 ASSERT(cbuf != nullptr);
304 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
305 ASSERT(cbuf_offset_imm != nullptr);
306 const auto cbuf_offset = cbuf_offset_imm->GetValue();
307 303
308 bb.push_back( 304 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
309 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
310 305
311 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; 306 const GlobalMemoryBase descriptor{index, offset};
312 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 307 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
313 auto& usage = entry->second; 308 auto& usage = entry->second;
314 if (is_write) { 309 if (is_write) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46a8ab82..d46e0f823 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 switch (opcode->get().GetId()) { 24 switch (opcode->get().GetId()) {
25 case OpCode::Id::NOP: {
26 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
27 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
28 // With the previous preconditions, this instruction is a no-operation.
29 break;
30 }
25 case OpCode::Id::EXIT: { 31 case OpCode::Id::EXIT: {
26 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 32 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
27 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", 33 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
@@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
68 case SystemVariable::InvocationInfo: 74 case SystemVariable::InvocationInfo:
69 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 75 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
70 return Immediate(0u); 76 return Immediate(0u);
77 case SystemVariable::Tid: {
78 Node value = Immediate(0);
79 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
80 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
81 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
82 return value;
83 }
71 case SystemVariable::TidX: 84 case SystemVariable::TidX:
72 return Operation(OperationCode::LocalInvocationIdX); 85 return Operation(OperationCode::LocalInvocationIdX);
73 case SystemVariable::TidY: 86 case SystemVariable::TidY:
@@ -91,11 +104,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
91 break; 104 break;
92 } 105 }
93 case OpCode::Id::BRA: { 106 case OpCode::Id::BRA: {
94 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 107 Node branch;
95 "BRA with constant buffers are not implemented"); 108 if (instr.bra.constant_buffer == 0) {
109 const u32 target = pc + instr.bra.GetBranchTarget();
110 branch = Operation(OperationCode::Branch, Immediate(target));
111 } else {
112 const u32 target = pc + 1;
113 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
114 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
115 PRECISE, op_a, Immediate(3));
116 const Node operand =
117 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
118 branch = Operation(OperationCode::BranchIndirect, operand);
119 }
96 120
97 const u32 target = pc + instr.bra.GetBranchTarget(); 121 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
98 const Node branch = Operation(OperationCode::Branch, Immediate(target)); 122 if (cc != Tegra::Shader::ConditionCode::T) {
123 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
124 } else {
125 bb.push_back(branch);
126 }
127 break;
128 }
129 case OpCode::Id::BRX: {
130 Node operand;
131 if (instr.brx.constant_buffer != 0) {
132 const s32 target = pc + 1;
133 const Node index = GetRegister(instr.gpr8);
134 const Node op_a =
135 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
136 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
137 PRECISE, op_a, Immediate(3));
138 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
139 } else {
140 const s32 target = pc + instr.brx.GetBranchExtend();
141 const Node op_a = GetRegister(instr.gpr8);
142 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
143 PRECISE, op_a, Immediate(3));
144 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
145 }
146 const Node branch = Operation(OperationCode::BranchIndirect, operand);
99 147
100 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 148 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
101 if (cc != Tegra::Shader::ConditionCode::T) { 149 if (cc != Tegra::Shader::ConditionCode::T) {
@@ -109,6 +157,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 157 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
110 "Constant buffer flow is not supported"); 158 "Constant buffer flow is not supported");
111 159
160 if (disable_flow_stack) {
161 break;
162 }
163
112 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. 164 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
113 const u32 target = pc + instr.bra.GetBranchTarget(); 165 const u32 target = pc + instr.bra.GetBranchTarget();
114 bb.push_back( 166 bb.push_back(
@@ -119,6 +171,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
119 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 171 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
120 "Constant buffer PBK is not supported"); 172 "Constant buffer PBK is not supported");
121 173
174 if (disable_flow_stack) {
175 break;
176 }
177
122 // PBK pushes to a stack the address where BRK will jump to. 178 // PBK pushes to a stack the address where BRK will jump to.
123 const u32 target = pc + instr.bra.GetBranchTarget(); 179 const u32 target = pc + instr.bra.GetBranchTarget();
124 bb.push_back( 180 bb.push_back(
@@ -130,6 +186,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
130 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 186 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
131 static_cast<u32>(cc)); 187 static_cast<u32>(cc));
132 188
189 if (disable_flow_stack) {
190 break;
191 }
192
133 // The SYNC opcode jumps to the address previously set by the SSY opcode 193 // The SYNC opcode jumps to the address previously set by the SSY opcode
134 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); 194 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
135 break; 195 break;
@@ -138,6 +198,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
138 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 198 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
139 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 199 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
140 static_cast<u32>(cc)); 200 static_cast<u32>(cc));
201 if (disable_flow_stack) {
202 break;
203 }
141 204
142 // The BRK opcode jumps to the address previously set by the PBK opcode 205 // The BRK opcode jumps to the address previously set by the PBK opcode
143 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); 206 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index febbfeb50..84dbc50fe 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
21 "Condition codes generation in PSET is not implemented"); 20 "Condition codes generation in PSET is not implemented");
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 2ac16eeb0..f6ee68a54 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 const Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
21 const Node op_b = [&]() { 21 Node op_b = [&]() {
22 if (instr.is_b_imm) { 22 if (instr.is_b_imm) {
23 return Immediate(instr.alu.GetSignedImm20_20()); 23 return Immediate(instr.alu.GetSignedImm20_20());
24 } else if (instr.is_b_gpr) { 24 } else if (instr.is_b_gpr) {
@@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
32 case OpCode::Id::SHR_C: 32 case OpCode::Id::SHR_C:
33 case OpCode::Id::SHR_R: 33 case OpCode::Id::SHR_R:
34 case OpCode::Id::SHR_IMM: { 34 case OpCode::Id::SHR_IMM: {
35 const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, 35 if (instr.shr.wrap) {
36 instr.shift.is_signed, PRECISE, op_a, op_b); 36 op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
37 } else {
38 op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
39 op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
40 }
41
42 Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
43 std::move(op_a), std::move(op_b));
37 SetInternalFlagsFromInteger(bb, value, instr.generates_cc); 44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
38 SetRegister(bb, instr.gpr0, value); 45 SetRegister(bb, instr.gpr0, std::move(value));
39 break; 46 break;
40 } 47 }
41 case OpCode::Id::SHL_C: 48 case OpCode::Id::SHL_C:
42 case OpCode::Id::SHL_R: 49 case OpCode::Id::SHL_R:
43 case OpCode::Id::SHL_IMM: { 50 case OpCode::Id::SHL_IMM: {
44 const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); 51 const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
45 SetInternalFlagsFromInteger(bb, value, instr.generates_cc); 52 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
46 SetRegister(bb, instr.gpr0, value); 53 SetRegister(bb, instr.gpr0, value);
47 break; 54 break;
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4a356dbd4..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
181 const Node value = 181 const Node value =
182 Operation(OperationCode::TextureQueryDimensions, meta, 182 Operation(OperationCode::TextureQueryDimensions, meta,
183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
184 SetTemporal(bb, indexer++, value); 184 SetTemporary(bb, indexer++, value);
185 } 185 }
186 for (u32 i = 0; i < indexer; ++i) { 186 for (u32 i = 0; i < indexer; ++i) {
187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
188 } 188 }
189 break; 189 break;
190 } 190 }
@@ -238,13 +238,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
238 auto params = coords; 238 auto params = coords;
239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; 239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
241 SetTemporal(bb, indexer++, value); 241 SetTemporary(bb, indexer++, value);
242 } 242 }
243 for (u32 i = 0; i < indexer; ++i) { 243 for (u32 i = 0; i < indexer; ++i) {
244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
245 } 245 }
246 break; 246 break;
247 } 247 }
248 case OpCode::Id::TLD: {
249 UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
250 UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
251 UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
252
253 if (instr.tld.nodep_flag) {
254 LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
255 }
256
257 WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
258 break;
259 }
248 case OpCode::Id::TLDS: { 260 case OpCode::Id::TLDS: {
249 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; 261 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
250 const bool is_array{instr.tlds.IsArrayTexture()}; 262 const bool is_array{instr.tlds.IsArrayTexture()};
@@ -257,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
257 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); 269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
258 } 270 }
259 271
260 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); 272 const Node4 components = GetTldsCode(instr, texture_type, is_array);
273
274 if (instr.tlds.fp32_flag) {
275 WriteTexsInstructionFloat(bb, instr, components);
276 } else {
277 WriteTexsInstructionHalfFloat(bb, instr, components);
278 }
261 break; 279 break;
262 } 280 }
263 default: 281 default:
@@ -290,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
290const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
291 bool is_array, bool is_shadow) { 309 bool is_array, bool is_shadow) {
292 const Node sampler_register = GetRegister(reg); 310 const Node sampler_register = GetRegister(reg);
293 const Node base_sampler = 311 const auto [base_sampler, cbuf_index, cbuf_offset] =
294 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
295 const auto cbuf = std::get_if<CbufNode>(&*base_sampler); 313 ASSERT(base_sampler != nullptr);
296 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
297 ASSERT(cbuf_offset_imm != nullptr);
298 const auto cbuf_offset = cbuf_offset_imm->GetValue();
299 const auto cbuf_index = cbuf->GetIndex();
300 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
301 315
302 // If this sampler has already been used, return the existing mapping. 316 // If this sampler has already been used, return the existing mapping.
@@ -322,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
322 // Skip disabled components 336 // Skip disabled components
323 continue; 337 continue;
324 } 338 }
325 SetTemporal(bb, dest_elem++, components[elem]); 339 SetTemporary(bb, dest_elem++, components[elem]);
326 } 340 }
327 // After writing values in temporals, move them to the real registers 341 // After writing values in temporals, move them to the real registers
328 for (u32 i = 0; i < dest_elem; ++i) { 342 for (u32 i = 0; i < dest_elem; ++i) {
329 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
330 } 344 }
331} 345}
332 346
@@ -339,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
339 for (u32 component = 0; component < 4; ++component) { 353 for (u32 component = 0; component < 4; ++component) {
340 if (!instr.texs.IsComponentEnabled(component)) 354 if (!instr.texs.IsComponentEnabled(component))
341 continue; 355 continue;
342 SetTemporal(bb, dest_elem++, components[component]); 356 SetTemporary(bb, dest_elem++, components[component]);
343 } 357 }
344 358
345 for (u32 i = 0; i < dest_elem; ++i) { 359 for (u32 i = 0; i < dest_elem; ++i) {
346 if (i < 2) { 360 if (i < 2) {
347 // Write the first two swizzle components to gpr0 and gpr0+1 361 // Write the first two swizzle components to gpr0 and gpr0+1
348 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); 362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
349 } else { 363 } else {
350 ASSERT(instr.texs.HasTwoDestinations()); 364 ASSERT(instr.texs.HasTwoDestinations());
351 // Write the rest of the swizzle components to gpr28 and gpr28+1 365 // Write the rest of the swizzle components to gpr28 and gpr28+1
352 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); 366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
353 } 367 }
354 } 368 }
355} 369}
@@ -377,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
377 return; 391 return;
378 } 392 }
379 393
380 SetTemporal(bb, 0, first_value); 394 SetTemporary(bb, 0, first_value);
381 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); 395 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
382 396
383 SetRegister(bb, instr.gpr0, GetTemporal(0)); 397 SetRegister(bb, instr.gpr0, GetTemporary(0));
384 SetRegister(bb, instr.gpr28, GetTemporal(1)); 398 SetRegister(bb, instr.gpr28, GetTemporary(1));
385} 399}
386 400
387Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
@@ -575,6 +589,39 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
575 return values; 589 return values;
576} 590}
577 591
592Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
593 const auto texture_type{instr.tld.texture_type};
594 const bool is_array{instr.tld.is_array};
595 const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
596 const std::size_t coord_count{GetCoordCount(texture_type)};
597
598 u64 gpr8_cursor{instr.gpr8.Value()};
599 const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
600
601 std::vector<Node> coords;
602 coords.reserve(coord_count);
603 for (std::size_t i = 0; i < coord_count; ++i) {
604 coords.push_back(GetRegister(gpr8_cursor++));
605 }
606
607 u64 gpr20_cursor{instr.gpr20.Value()};
608 // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
609 const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
610 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
611 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
612
613 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
614
615 Node4 values;
616 for (u32 element = 0; element < values.size(); ++element) {
617 auto coords_copy = coords;
618 MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
619 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
620 }
621
622 return values;
623}
624
578Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { 625Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
579 const std::size_t type_coord_count = GetCoordCount(texture_type); 626 const std::size_t type_coord_count = GetCoordCount(texture_type);
580 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; 627 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::VoteOperation;
17
18namespace {
19OperationCode GetOperationCode(VoteOperation vote_op) {
20 switch (vote_op) {
21 case VoteOperation::All:
22 return OperationCode::VoteAll;
23 case VoteOperation::Any:
24 return OperationCode::VoteAny;
25 case VoteOperation::Eq:
26 return OperationCode::VoteEqual;
27 default:
28 UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
29 return OperationCode::VoteAll;
30 }
31}
32} // Anonymous namespace
33
34u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
35 const Instruction instr = {program_code[pc]};
36 const auto opcode = OpCode::Decode(instr);
37
38 switch (opcode->get().GetId()) {
39 case OpCode::Id::VOTE: {
40 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
41 const Node active = Operation(OperationCode::BallotThread, value);
42 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
43 SetRegister(bb, instr.gpr0, active);
44 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break;
46 }
47 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break;
50 }
51
52 return pc;
53}
54
55} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
73 if (is_psl) { 73 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
75 } 75 }
76 SetTemporal(bb, 0, product); 76 SetTemporary(bb, 0, product);
77 product = GetTemporal(0); 77 product = GetTemporary(0);
78 78
79 const Node original_c = op_c; 79 const Node original_c = op_c;
80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error 80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
98 } 98 }
99 }(); 99 }();
100 100
101 SetTemporal(bb, 1, op_c); 101 SetTemporary(bb, 1, op_c);
102 op_c = GetTemporal(1); 102 op_c = GetTemporary(1);
103 103
104 // TODO(Rodrigo): Use an appropiate sign for this operation 104 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 105 Node sum = Operation(OperationCode::IAdd, product, op_c);
106 SetTemporal(bb, 2, sum); 106 SetTemporary(bb, 2, sum);
107 sum = GetTemporal(2); 107 sum = GetTemporary(2);
108 if (is_merge) { 108 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 109 const Node a = BitfieldExtract(sum, 0, 16);
110 const Node b = 110 const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 3cfb911bb..b47b201cf 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <memory> 9#include <memory>
10#include <optional>
10#include <string> 11#include <string>
11#include <tuple> 12#include <tuple>
12#include <utility> 13#include <utility>
@@ -30,6 +31,8 @@ enum class OperationCode {
30 FNegate, /// (MetaArithmetic, float a) -> float 31 FNegate, /// (MetaArithmetic, float a) -> float
31 FAbsolute, /// (MetaArithmetic, float a) -> float 32 FAbsolute, /// (MetaArithmetic, float a) -> float
32 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float 33 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
34 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
35 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
33 FMin, /// (MetaArithmetic, float a, float b) -> float 36 FMin, /// (MetaArithmetic, float a, float b) -> float
34 FMax, /// (MetaArithmetic, float a, float b) -> float 37 FMax, /// (MetaArithmetic, float a, float b) -> float
35 FCos, /// (MetaArithmetic, float a) -> float 38 FCos, /// (MetaArithmetic, float a) -> float
@@ -83,17 +86,18 @@ enum class OperationCode {
83 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint 86 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
84 UBitCount, /// (MetaArithmetic, uint) -> uint 87 UBitCount, /// (MetaArithmetic, uint) -> uint
85 88
86 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 89 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
87 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 90 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
88 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 91 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
89 HAbsolute, /// (f16vec2 a) -> f16vec2 92 HAbsolute, /// (f16vec2 a) -> f16vec2
90 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 93 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
91 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 94 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
92 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 95 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
93 HMergeF32, /// (f16vec2 src) -> float 96 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
94 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 97 HMergeF32, /// (f16vec2 src) -> float
95 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 98 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
96 HPack2, /// (float a, float b) -> f16vec2 99 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
100 HPack2, /// (float a, float b) -> f16vec2
97 101
98 LogicalAssign, /// (bool& dst, bool src) -> void 102 LogicalAssign, /// (bool& dst, bool src) -> void
99 LogicalAnd, /// (bool a, bool b) -> bool 103 LogicalAnd, /// (bool a, bool b) -> bool
@@ -101,8 +105,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 105 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 106 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 107 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 108 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 109
107 LogicalFLessThan, /// (float a, float b) -> bool 110 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 111 LogicalFEqual, /// (float a, float b) -> bool
@@ -146,11 +149,21 @@ enum class OperationCode {
146 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 149 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
147 TexelFetch, /// (MetaTexture, int[N], int) -> float4 150 TexelFetch, /// (MetaTexture, int[N], int) -> float4
148 151
149 Branch, /// (uint branch_target) -> void 152 ImageStore, /// (MetaImage, int[N] values) -> void
150 PushFlowStack, /// (uint branch_target) -> void 153 AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
151 PopFlowStack, /// () -> void 154 AtomicImageMin, /// (MetaImage, int[N] coords) -> void
152 Exit, /// () -> void 155 AtomicImageMax, /// (MetaImage, int[N] coords) -> void
153 Discard, /// () -> void 156 AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
157 AtomicImageOr, /// (MetaImage, int[N] coords) -> void
158 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
159 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
160
161 Branch, /// (uint branch_target) -> void
162 BranchIndirect, /// (uint branch_target) -> void
163 PushFlowStack, /// (uint branch_target) -> void
164 PopFlowStack, /// () -> void
165 Exit, /// () -> void
166 Discard, /// () -> void
154 167
155 EmitVertex, /// () -> void 168 EmitVertex, /// () -> void
156 EndPrimitive, /// () -> void 169 EndPrimitive, /// () -> void
@@ -163,6 +176,11 @@ enum class OperationCode {
163 WorkGroupIdY, /// () -> uint 176 WorkGroupIdY, /// () -> uint
164 WorkGroupIdZ, /// () -> uint 177 WorkGroupIdZ, /// () -> uint
165 178
179 BallotThread, /// (bool) -> uint
180 VoteAll, /// (bool) -> bool
181 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool
183
166 Amount, 184 Amount,
167}; 185};
168 186
@@ -263,6 +281,87 @@ private:
263 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. 281 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
264}; 282};
265 283
284class Image final {
285public:
286 constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
287 std::optional<Tegra::Shader::ImageAtomicSize> size)
288 : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {}
289
290 constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
291 Tegra::Shader::ImageType type,
292 std::optional<Tegra::Shader::ImageAtomicSize> size)
293 : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
294 is_bindless{true}, size{size} {}
295
296 constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
297 bool is_bindless, bool is_written, bool is_read,
298 std::optional<Tegra::Shader::ImageAtomicSize> size)
299 : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
300 is_written{is_written}, is_read{is_read}, size{size} {}
301
302 void MarkWrite() {
303 is_written = true;
304 }
305
306 void MarkRead() {
307 is_read = true;
308 }
309
310 void SetSize(Tegra::Shader::ImageAtomicSize size_) {
311 size = size_;
312 }
313
314 constexpr std::size_t GetOffset() const {
315 return offset;
316 }
317
318 constexpr std::size_t GetIndex() const {
319 return index;
320 }
321
322 constexpr Tegra::Shader::ImageType GetType() const {
323 return type;
324 }
325
326 constexpr bool IsBindless() const {
327 return is_bindless;
328 }
329
330 constexpr bool IsWritten() const {
331 return is_written;
332 }
333
334 constexpr bool IsRead() const {
335 return is_read;
336 }
337
338 constexpr std::pair<u32, u32> GetBindlessCBuf() const {
339 return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
340 }
341
342 constexpr bool IsSizeKnown() const {
343 return size.has_value();
344 }
345
346 constexpr Tegra::Shader::ImageAtomicSize GetSize() const {
347 return size.value();
348 }
349
350 constexpr bool operator<(const Image& rhs) const {
351 return std::tie(offset, index, type, size, is_bindless) <
352 std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless);
353 }
354
355private:
356 u64 offset{};
357 std::size_t index{};
358 Tegra::Shader::ImageType type{};
359 bool is_bindless{};
360 bool is_written{};
361 bool is_read{};
362 std::optional<Tegra::Shader::ImageAtomicSize> size{};
363};
364
266struct GlobalMemoryBase { 365struct GlobalMemoryBase {
267 u32 cbuf_index{}; 366 u32 cbuf_index{};
268 u32 cbuf_offset{}; 367 u32 cbuf_offset{};
@@ -289,8 +388,14 @@ struct MetaTexture {
289 u32 element{}; 388 u32 element{};
290}; 389};
291 390
391struct MetaImage {
392 const Image& image;
393 std::vector<Node> values;
394};
395
292/// Parameters that modify an operation but are not part of any particular operand 396/// Parameters that modify an operation but are not part of any particular operand
293using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; 397using Meta =
398 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
294 399
295/// Holds any kind of operation that can be done in the IR 400/// Holds any kind of operation that can be done in the IR
296class OperationNode final { 401class OperationNode final {
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14Node Conditional(Node condition, std::vector<Node> code) { 14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(condition, std::move(code)); 15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16} 16}
17 17
18Node Comment(std::string text) { 18Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 11b545cca..1e5c7f660 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 22using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 23using Tegra::Shader::Register;
24 24
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) 25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
26 : program_code{program_code}, main_offset{main_offset} { 26 : program_code{program_code}, main_offset{main_offset}, program_size{size} {
27 Decode(); 27 Decode();
28} 28}
29 29
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 64 Node final_offset = [&] {
65 return MakeNode<CbufNode>(index, final_offset); 65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
68 if (gpr->GetIndex() == Register::ZeroIndex) {
69 return Immediate(offset);
70 }
71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
73 }();
74 return MakeNode<CbufNode>(index, std::move(final_offset));
66} 75}
67 76
68Node ShaderIR::GetPredicate(u64 pred_, bool negated) { 77Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
80 89
81Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
82 used_input_attributes.emplace(index); 91 used_input_attributes.emplace(index);
83 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 92 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
84} 93}
85 94
86Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { 95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
89} 98}
90 99
91Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { 100Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
101 if (index == Attribute::Index::LayerViewportPointSize) {
102 switch (element) {
103 case 0:
104 UNIMPLEMENTED();
105 break;
106 case 1:
107 uses_layer = true;
108 break;
109 case 2:
110 uses_viewport_index = true;
111 break;
112 case 3:
113 uses_point_size = true;
114 break;
115 }
116 }
92 if (index == Attribute::Index::ClipDistances0123 || 117 if (index == Attribute::Index::ClipDistances0123 ||
93 index == Attribute::Index::ClipDistances4567) { 118 index == Attribute::Index::ClipDistances4567) {
94 const auto clip_index = 119 const auto clip_index =
@@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
97 } 122 }
98 used_output_attributes.insert(index); 123 used_output_attributes.insert(index);
99 124
100 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 125 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
101} 126}
102 127
103Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { 128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
109} 134}
110 135
111Node ShaderIR::GetLocalMemory(Node address) { 136Node ShaderIR::GetLocalMemory(Node address) {
112 return MakeNode<LmemNode>(address); 137 return MakeNode<LmemNode>(std::move(address));
113} 138}
114 139
115Node ShaderIR::GetTemporal(u32 id) { 140Node ShaderIR::GetTemporary(u32 id) {
116 return GetRegister(Register::ZeroIndex + 1 + id); 141 return GetRegister(Register::ZeroIndex + 1 + id);
117} 142}
118 143
119Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { 144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
120 if (absolute) { 145 if (absolute) {
121 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); 146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
122 } 147 }
123 if (negate) { 148 if (negate) {
124 value = Operation(OperationCode::FNegate, NO_PRECISE, value); 149 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
125 } 150 }
126 return value; 151 return value;
127} 152}
@@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
130 if (!saturate) { 155 if (!saturate) {
131 return value; 156 return value;
132 } 157 }
133 const Node positive_zero = Immediate(std::copysignf(0, 1)); 158
134 const Node positive_one = Immediate(1.0f); 159 Node positive_zero = Immediate(std::copysignf(0, 1));
135 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); 160 Node positive_one = Immediate(1.0f);
161 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
162 std::move(positive_one));
136} 163}
137 164
138Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { 165Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
139 switch (size) { 166 switch (size) {
140 case Register::Size::Byte: 167 case Register::Size::Byte:
141 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 168 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
142 Immediate(24)); 169 std::move(value), Immediate(24));
143 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 170 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
144 Immediate(24)); 171 std::move(value), Immediate(24));
145 return value; 172 return value;
146 case Register::Size::Short: 173 case Register::Size::Short:
147 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 174 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
148 Immediate(16)); 175 std::move(value), Immediate(16));
149 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 176 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
150 Immediate(16)); 177 std::move(value), Immediate(16));
151 case Register::Size::Word: 178 case Register::Size::Word:
152 // Default - do nothing 179 // Default - do nothing
153 return value; 180 return value;
@@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
163 return value; 190 return value;
164 } 191 }
165 if (absolute) { 192 if (absolute) {
166 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); 193 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
167 } 194 }
168 if (negate) { 195 if (negate) {
169 value = Operation(OperationCode::INegate, NO_PRECISE, value); 196 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
170 } 197 }
171 return value; 198 return value;
172} 199}
173 200
174Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { 201Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
175 const Node value = Immediate(instr.half_imm.PackImmediates()); 202 Node value = Immediate(instr.half_imm.PackImmediates());
176 if (!has_negation) { 203 if (!has_negation) {
177 return value; 204 return value;
178 } 205 }
179 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
180 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
181 206
182 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); 207 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
208 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
209
210 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
211 std::move(second_negate));
183} 212}
184 213
185Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { 214Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
186 return Operation(OperationCode::HUnpack, type, value); 215 return Operation(OperationCode::HUnpack, type, std::move(value));
187} 216}
188 217
189Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 218Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
191 case Tegra::Shader::HalfMerge::H0_H1: 220 case Tegra::Shader::HalfMerge::H0_H1:
192 return src; 221 return src;
193 case Tegra::Shader::HalfMerge::F32: 222 case Tegra::Shader::HalfMerge::F32:
194 return Operation(OperationCode::HMergeF32, src); 223 return Operation(OperationCode::HMergeF32, std::move(src));
195 case Tegra::Shader::HalfMerge::Mrg_H0: 224 case Tegra::Shader::HalfMerge::Mrg_H0:
196 return Operation(OperationCode::HMergeH0, dest, src); 225 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
197 case Tegra::Shader::HalfMerge::Mrg_H1: 226 case Tegra::Shader::HalfMerge::Mrg_H1:
198 return Operation(OperationCode::HMergeH1, dest, src); 227 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
199 } 228 }
200 UNREACHABLE(); 229 UNREACHABLE();
201 return src; 230 return src;
@@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
203 232
204Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 233Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
205 if (absolute) { 234 if (absolute) {
206 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); 235 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
207 } 236 }
208 if (negate) { 237 if (negate) {
209 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), 238 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
210 GetPredicate(true)); 239 GetPredicate(true));
211 } 240 }
212 return value; 241 return value;
@@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
216 if (!saturate) { 245 if (!saturate) {
217 return value; 246 return value;
218 } 247 }
219 const Node positive_zero = Immediate(std::copysignf(0, 1)); 248
220 const Node positive_one = Immediate(1.0f); 249 Node positive_zero = Immediate(std::copysignf(0, 1));
221 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); 250 Node positive_one = Immediate(1.0f);
251 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
252 std::move(positive_one));
222} 253}
223 254
224Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 255Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
246 condition == PredCondition::LessEqualWithNan || 277 condition == PredCondition::LessEqualWithNan ||
247 condition == PredCondition::GreaterThanWithNan || 278 condition == PredCondition::GreaterThanWithNan ||
248 condition == PredCondition::GreaterEqualWithNan) { 279 condition == PredCondition::GreaterEqualWithNan) {
249
250 predicate = Operation(OperationCode::LogicalOr, predicate, 280 predicate = Operation(OperationCode::LogicalOr, predicate,
251 Operation(OperationCode::LogicalFIsNan, op_a)); 281 Operation(OperationCode::LogicalFIsNan, op_a));
252 predicate = Operation(OperationCode::LogicalOr, predicate, 282 predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
275 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
276 "Unknown predicate comparison operation"); 306 "Unknown predicate comparison operation");
277 307
278 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); 308 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
309 std::move(op_b));
279 310
280 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || 311 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
281 condition == PredCondition::NotEqualWithNan || 312 condition == PredCondition::NotEqualWithNan ||
@@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 336 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
306 "Unknown predicate comparison operation"); 337 "Unknown predicate comparison operation");
307 338
308 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 339 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
309
310 return predicate;
311} 340}
312 341
313OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 342OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
333} 362}
334 363
335void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { 364void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
336 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 365 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
337} 366}
338 367
339void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { 368void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
340 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 369 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
341} 370}
342 371
343void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { 372void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
344 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 373 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
345} 374}
346 375
347void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { 376void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
348 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 377 bb.push_back(
378 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
349} 379}
350 380
351void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { 381void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
352 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 382 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
353} 383}
354 384
355void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { 385void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
356 if (!sets_cc) { 386 if (!sets_cc) {
357 return; 387 return;
358 } 388 }
359 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 389 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
360 SetInternalFlag(bb, InternalFlag::Zero, zerop); 390 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
361 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 391 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
362} 392}
363 393
@@ -365,13 +395,18 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
365 if (!sets_cc) { 395 if (!sets_cc) {
366 return; 396 return;
367 } 397 }
368 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); 398 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
369 SetInternalFlag(bb, InternalFlag::Zero, zerop); 399 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
370 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 400 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
371} 401}
372 402
373Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 403Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
374 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 404 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
405 Immediate(offset), Immediate(bits));
406}
407
408Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
409 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
375 Immediate(bits)); 410 Immediate(bits));
376} 411}
377 412
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index edcf2288e..62816bd56 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
9#include <map> 8#include <map>
10#include <optional> 9#include <optional>
11#include <set> 10#include <set>
12#include <string>
13#include <tuple> 11#include <tuple>
14#include <variant>
15#include <vector> 12#include <vector>
16 13
17#include "common/common_types.h" 14#include "common/common_types.h"
@@ -22,18 +19,12 @@
22 19
23namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
24 21
22struct ShaderBlock;
23
25using ProgramCode = std::vector<u64>; 24using ProgramCode = std::vector<u64>;
26 25
27constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 26constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
28 27
29/// Describes the behaviour of code path of a given entry point and a return point.
30enum class ExitMethod {
31 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
32 AlwaysReturn, ///< All code paths reach the return point.
33 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
34 AlwaysEnd, ///< All code paths reach a END instruction.
35};
36
37class ConstBuffer { 28class ConstBuffer {
38public: 29public:
39 explicit ConstBuffer(u32 max_offset, bool is_indirect) 30 explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -73,7 +64,7 @@ struct GlobalMemoryUsage {
73 64
74class ShaderIR final { 65class ShaderIR final {
75public: 66public:
76 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); 67 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
77 ~ShaderIR(); 68 ~ShaderIR();
78 69
79 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 70 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -104,6 +95,10 @@ public:
104 return used_samplers; 95 return used_samplers;
105 } 96 }
106 97
98 const std::map<u64, Image>& GetImages() const {
99 return used_images;
100 }
101
107 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() 102 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
108 const { 103 const {
109 return used_clip_distances; 104 return used_clip_distances;
@@ -117,6 +112,18 @@ public:
117 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 112 return static_cast<std::size_t>(coverage_end * sizeof(u64));
118 } 113 }
119 114
115 bool UsesLayer() const {
116 return uses_layer;
117 }
118
119 bool UsesViewportIndex() const {
120 return uses_viewport_index;
121 }
122
123 bool UsesPointSize() const {
124 return uses_point_size;
125 }
126
120 bool HasPhysicalAttributes() const { 127 bool HasPhysicalAttributes() const {
121 return uses_physical_attributes; 128 return uses_physical_attributes;
122 } 129 }
@@ -125,12 +132,20 @@ public:
125 return header; 132 return header;
126 } 133 }
127 134
135 bool IsFlowStackDisabled() const {
136 return disable_flow_stack;
137 }
138
139 u32 ConvertAddressToNvidiaSpace(const u32 address) const {
140 return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
141 }
142
128private: 143private:
129 void Decode(); 144 void Decode();
130 145
131 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
132
133 NodeBlock DecodeRange(u32 begin, u32 end); 146 NodeBlock DecodeRange(u32 begin, u32 end);
147 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
148 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
134 149
135 /** 150 /**
136 * Decodes a single instruction from Tegra to IR. 151 * Decodes a single instruction from Tegra to IR.
@@ -152,8 +167,10 @@ private:
152 u32 DecodeFfma(NodeBlock& bb, u32 pc); 167 u32 DecodeFfma(NodeBlock& bb, u32 pc);
153 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 168 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
154 u32 DecodeConversion(NodeBlock& bb, u32 pc); 169 u32 DecodeConversion(NodeBlock& bb, u32 pc);
170 u32 DecodeWarp(NodeBlock& bb, u32 pc);
155 u32 DecodeMemory(NodeBlock& bb, u32 pc); 171 u32 DecodeMemory(NodeBlock& bb, u32 pc);
156 u32 DecodeTexture(NodeBlock& bb, u32 pc); 172 u32 DecodeTexture(NodeBlock& bb, u32 pc);
173 u32 DecodeImage(NodeBlock& bb, u32 pc);
157 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 174 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
158 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 175 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
159 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 176 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -191,8 +208,8 @@ private:
191 Node GetInternalFlag(InternalFlag flag, bool negated = false); 208 Node GetInternalFlag(InternalFlag flag, bool negated = false);
192 /// Generates a node representing a local memory address 209 /// Generates a node representing a local memory address
193 Node GetLocalMemory(Node address); 210 Node GetLocalMemory(Node address);
194 /// Generates a temporal, internally it uses a post-RZ register 211 /// Generates a temporary, internally it uses a post-RZ register
195 Node GetTemporal(u32 id); 212 Node GetTemporary(u32 id);
196 213
197 /// Sets a register. src value must be a number-evaluated node. 214 /// Sets a register. src value must be a number-evaluated node.
198 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); 215 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -202,8 +219,8 @@ private:
202 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); 219 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
203 /// Sets a local memory address. address and value must be a number-evaluated node 220 /// Sets a local memory address. address and value must be a number-evaluated node
204 void SetLocalMemory(NodeBlock& bb, Node address, Node value); 221 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
205 /// Sets a temporal. Internally it uses a post-RZ register 222 /// Sets a temporary. Internally it uses a post-RZ register
206 void SetTemporal(NodeBlock& bb, u32 id, Node value); 223 void SetTemporary(NodeBlock& bb, u32 id, Node value);
207 224
208 /// Sets internal flags from a float 225 /// Sets internal flags from a float
209 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); 226 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
@@ -254,9 +271,24 @@ private:
254 Tegra::Shader::TextureType type, bool is_array, 271 Tegra::Shader::TextureType type, bool is_array,
255 bool is_shadow); 272 bool is_shadow);
256 273
274 /// Accesses an image.
275 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
276 std::optional<Tegra::Shader::ImageAtomicSize> size = {});
277
278 /// Access a bindless image sampler.
279 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
280 std::optional<Tegra::Shader::ImageAtomicSize> size = {});
281
282 /// Tries to access an existing image, updating it's state as needed
283 Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
284 std::optional<Tegra::Shader::ImageAtomicSize> size);
285
257 /// Extracts a sequence of bits from a node 286 /// Extracts a sequence of bits from a node
258 Node BitfieldExtract(Node value, u32 offset, u32 bits); 287 Node BitfieldExtract(Node value, u32 offset, u32 bits);
259 288
289 /// Inserts a sequence of bits from a node
290 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
291
260 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 292 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
261 const Node4& components); 293 const Node4& components);
262 294
@@ -277,6 +309,8 @@ private:
277 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 309 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
278 bool depth_compare, bool is_array, bool is_aoffi); 310 bool depth_compare, bool is_array, bool is_aoffi);
279 311
312 Node4 GetTldCode(Tegra::Shader::Instruction instr);
313
280 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 314 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
281 bool is_array); 315 bool is_array);
282 316
@@ -301,7 +335,7 @@ private:
301 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 335 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
302 Node op_c, Node imm_lut, bool sets_cc); 336 Node op_c, Node imm_lut, bool sets_cc);
303 337
304 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 338 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
305 339
306 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 340 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
307 341
@@ -313,10 +347,11 @@ private:
313 347
314 const ProgramCode& program_code; 348 const ProgramCode& program_code;
315 const u32 main_offset; 349 const u32 main_offset;
350 const std::size_t program_size;
351 bool disable_flow_stack{};
316 352
317 u32 coverage_begin{}; 353 u32 coverage_begin{};
318 u32 coverage_end{}; 354 u32 coverage_end{};
319 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
320 355
321 std::map<u32, NodeBlock> basic_blocks; 356 std::map<u32, NodeBlock> basic_blocks;
322 NodeBlock global_code; 357 NodeBlock global_code;
@@ -327,8 +362,12 @@ private:
327 std::set<Tegra::Shader::Attribute::Index> used_output_attributes; 362 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
328 std::map<u32, ConstBuffer> used_cbufs; 363 std::map<u32, ConstBuffer> used_cbufs;
329 std::set<Sampler> used_samplers; 364 std::set<Sampler> used_samplers;
365 std::map<u64, Image> used_images;
330 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 366 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
331 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 367 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
368 bool uses_layer{};
369 bool uses_viewport_index{};
370 bool uses_point_size{};
332 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes 371 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
333 372
334 Tegra::Shader::Header header; 373 Tegra::Shader::Header header;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..55f5949e4 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,56 +15,63 @@ namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) { 16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor); 18 Node node = code.at(cursor);
19
19 if (const auto operation = std::get_if<OperationNode>(&*node)) { 20 if (const auto operation = std::get_if<OperationNode>(&*node)) {
20 if (operation->GetCode() == operation_code) { 21 if (operation->GetCode() == operation_code) {
21 return {node, cursor}; 22 return {std::move(node), cursor};
22 } 23 }
23 } 24 }
25
24 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 26 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
25 const auto& conditional_code = conditional->GetCode(); 27 const auto& conditional_code = conditional->GetCode();
26 const auto [found, internal_cursor] = FindOperation( 28 auto [found, internal_cursor] = FindOperation(
27 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); 29 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
28 if (found) { 30 if (found) {
29 return {found, cursor}; 31 return {std::move(found), cursor};
30 } 32 }
31 } 33 }
32 } 34 }
33 return {}; 35 return {};
34} 36}
35} // namespace 37} // Anonymous namespace
36 38
37Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { 39std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
40 s64 cursor) const {
38 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 41 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
39 // Cbuf found, but it has to be immediate 42 // Constant buffer found, test if it's an immediate
40 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 43 const auto offset = cbuf->GetOffset();
44 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
45 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
46 }
47 return {};
41 } 48 }
42 if (const auto gpr = std::get_if<GprNode>(&*tracked)) { 49 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
43 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 50 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
44 return nullptr; 51 return {};
45 } 52 }
46 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 53 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
47 // register that it uses as operand 54 // register that it uses as operand
48 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 55 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
49 if (!source) { 56 if (!source) {
50 return nullptr; 57 return {};
51 } 58 }
52 return TrackCbuf(source, code, new_cursor); 59 return TrackCbuf(source, code, new_cursor);
53 } 60 }
54 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
55 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 62 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
56 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { 63 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
57 // Cbuf found in operand 64 // Cbuf found in operand.
58 return found; 65 return found;
59 } 66 }
60 } 67 }
61 return nullptr; 68 return {};
62 } 69 }
63 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { 70 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
64 const auto& conditional_code = conditional->GetCode(); 71 const auto& conditional_code = conditional->GetCode();
65 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); 72 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
66 } 73 }
67 return nullptr; 74 return {};
68} 75}
69 76
70std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { 77std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6384fa8d2..4ceb219be 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
12 switch (texture_type) { 12 switch (texture_type) {
13 case Tegra::Texture::TextureType::Texture1D: 13 case Tegra::Texture::TextureType::Texture1D:
14 return SurfaceTarget::Texture1D; 14 return SurfaceTarget::Texture1D;
15 case Tegra::Texture::TextureType::Texture1DBuffer:
16 return SurfaceTarget::TextureBuffer;
15 case Tegra::Texture::TextureType::Texture2D: 17 case Tegra::Texture::TextureType::Texture2D:
16 case Tegra::Texture::TextureType::Texture2DNoMipmap: 18 case Tegra::Texture::TextureType::Texture2DNoMipmap:
17 return SurfaceTarget::Texture2D; 19 return SurfaceTarget::Texture2D;
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
35bool SurfaceTargetIsLayered(SurfaceTarget target) { 37bool SurfaceTargetIsLayered(SurfaceTarget target) {
36 switch (target) { 38 switch (target) {
37 case SurfaceTarget::Texture1D: 39 case SurfaceTarget::Texture1D:
40 case SurfaceTarget::TextureBuffer:
38 case SurfaceTarget::Texture2D: 41 case SurfaceTarget::Texture2D:
39 case SurfaceTarget::Texture3D: 42 case SurfaceTarget::Texture3D:
40 return false; 43 return false;
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
53bool SurfaceTargetIsArray(SurfaceTarget target) { 56bool SurfaceTargetIsArray(SurfaceTarget target) {
54 switch (target) { 57 switch (target) {
55 case SurfaceTarget::Texture1D: 58 case SurfaceTarget::Texture1D:
59 case SurfaceTarget::TextureBuffer:
56 case SurfaceTarget::Texture2D: 60 case SurfaceTarget::Texture2D:
57 case SurfaceTarget::Texture3D: 61 case SurfaceTarget::Texture3D:
58 case SurfaceTarget::TextureCubemap: 62 case SurfaceTarget::TextureCubemap:
@@ -304,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
304 return PixelFormat::Z32F; 308 return PixelFormat::Z32F;
305 case Tegra::Texture::TextureFormat::Z16: 309 case Tegra::Texture::TextureFormat::Z16:
306 return PixelFormat::Z16; 310 return PixelFormat::Z16;
307 case Tegra::Texture::TextureFormat::Z24S8: 311 case Tegra::Texture::TextureFormat::S8Z24:
308 return PixelFormat::Z24S8; 312 return PixelFormat::S8Z24;
309 case Tegra::Texture::TextureFormat::ZF32_X24S8: 313 case Tegra::Texture::TextureFormat::ZF32_X24S8:
310 return PixelFormat::Z32FS8; 314 return PixelFormat::Z32FS8;
311 case Tegra::Texture::TextureFormat::DXT1: 315 case Tegra::Texture::TextureFormat::DXT1:
@@ -441,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
441 switch (format) { 445 switch (format) {
442 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 446 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
443 return PixelFormat::ABGR8U; 447 return PixelFormat::ABGR8U;
448 case Tegra::FramebufferConfig::PixelFormat::RGB565:
449 return PixelFormat::B5G6R5U;
444 case Tegra::FramebufferConfig::PixelFormat::BGRA8: 450 case Tegra::FramebufferConfig::PixelFormat::BGRA8:
445 return PixelFormat::BGRA8; 451 return PixelFormat::BGRA8;
446 default: 452 default:
447 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 453 UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
448 UNREACHABLE();
449 return PixelFormat::ABGR8U; 454 return PixelFormat::ABGR8U;
450 } 455 }
451} 456}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index b783e4b27..83f31c12c 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -114,6 +114,7 @@ enum class SurfaceType {
114 114
115enum class SurfaceTarget { 115enum class SurfaceTarget {
116 Texture1D, 116 Texture1D,
117 TextureBuffer,
117 Texture2D, 118 Texture2D,
118 Texture3D, 119 Texture3D,
119 Texture1DArray, 120 Texture1DArray,
@@ -122,71 +123,71 @@ enum class SurfaceTarget {
122 TextureCubeArray, 123 TextureCubeArray,
123}; 124};
124 125
125constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ 126constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
126 1, // ABGR8U 127 0, // ABGR8U
127 1, // ABGR8S 128 0, // ABGR8S
128 1, // ABGR8UI 129 0, // ABGR8UI
129 1, // B5G6R5U 130 0, // B5G6R5U
130 1, // A2B10G10R10U 131 0, // A2B10G10R10U
131 1, // A1B5G5R5U 132 0, // A1B5G5R5U
132 1, // R8U 133 0, // R8U
133 1, // R8UI 134 0, // R8UI
134 1, // RGBA16F 135 0, // RGBA16F
135 1, // RGBA16U 136 0, // RGBA16U
136 1, // RGBA16UI 137 0, // RGBA16UI
137 1, // R11FG11FB10F 138 0, // R11FG11FB10F
138 1, // RGBA32UI 139 0, // RGBA32UI
139 4, // DXT1 140 2, // DXT1
140 4, // DXT23 141 2, // DXT23
141 4, // DXT45 142 2, // DXT45
142 4, // DXN1 143 2, // DXN1
143 4, // DXN2UNORM 144 2, // DXN2UNORM
144 4, // DXN2SNORM 145 2, // DXN2SNORM
145 4, // BC7U 146 2, // BC7U
146 4, // BC6H_UF16 147 2, // BC6H_UF16
147 4, // BC6H_SF16 148 2, // BC6H_SF16
148 4, // ASTC_2D_4X4 149 2, // ASTC_2D_4X4
149 1, // BGRA8 150 0, // BGRA8
150 1, // RGBA32F 151 0, // RGBA32F
151 1, // RG32F 152 0, // RG32F
152 1, // R32F 153 0, // R32F
153 1, // R16F 154 0, // R16F
154 1, // R16U 155 0, // R16U
155 1, // R16S 156 0, // R16S
156 1, // R16UI 157 0, // R16UI
157 1, // R16I 158 0, // R16I
158 1, // RG16 159 0, // RG16
159 1, // RG16F 160 0, // RG16F
160 1, // RG16UI 161 0, // RG16UI
161 1, // RG16I 162 0, // RG16I
162 1, // RG16S 163 0, // RG16S
163 1, // RGB32F 164 0, // RGB32F
164 1, // RGBA8_SRGB 165 0, // RGBA8_SRGB
165 1, // RG8U 166 0, // RG8U
166 1, // RG8S 167 0, // RG8S
167 1, // RG32UI 168 0, // RG32UI
168 1, // R32UI 169 0, // R32UI
169 4, // ASTC_2D_8X8 170 2, // ASTC_2D_8X8
170 4, // ASTC_2D_8X5 171 2, // ASTC_2D_8X5
171 4, // ASTC_2D_5X4 172 2, // ASTC_2D_5X4
172 1, // BGRA8_SRGB 173 0, // BGRA8_SRGB
173 4, // DXT1_SRGB 174 2, // DXT1_SRGB
174 4, // DXT23_SRGB 175 2, // DXT23_SRGB
175 4, // DXT45_SRGB 176 2, // DXT45_SRGB
176 4, // BC7U_SRGB 177 2, // BC7U_SRGB
177 4, // ASTC_2D_4X4_SRGB 178 2, // ASTC_2D_4X4_SRGB
178 4, // ASTC_2D_8X8_SRGB 179 2, // ASTC_2D_8X8_SRGB
179 4, // ASTC_2D_8X5_SRGB 180 2, // ASTC_2D_8X5_SRGB
180 4, // ASTC_2D_5X4_SRGB 181 2, // ASTC_2D_5X4_SRGB
181 4, // ASTC_2D_5X5 182 2, // ASTC_2D_5X5
182 4, // ASTC_2D_5X5_SRGB 183 2, // ASTC_2D_5X5_SRGB
183 4, // ASTC_2D_10X8 184 2, // ASTC_2D_10X8
184 4, // ASTC_2D_10X8_SRGB 185 2, // ASTC_2D_10X8_SRGB
185 1, // Z32F 186 0, // Z32F
186 1, // Z16 187 0, // Z16
187 1, // Z24S8 188 0, // Z24S8
188 1, // S8Z24 189 0, // S8Z24
189 1, // Z32FS8 190 0, // Z32FS8
190}}; 191}};
191 192
192/** 193/**
@@ -195,12 +196,14 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
195 * compressed image. This is used for maintaining proper surface sizes for compressed 196 * compressed image. This is used for maintaining proper surface sizes for compressed
196 * texture formats. 197 * texture formats.
197 */ 198 */
198static constexpr u32 GetCompressionFactor(PixelFormat format) { 199inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
199 if (format == PixelFormat::Invalid) 200 DEBUG_ASSERT(format != PixelFormat::Invalid);
200 return 0; 201 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
202 return compression_factor_shift_table[static_cast<std::size_t>(format)];
203}
201 204
202 ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); 205inline constexpr u32 GetCompressionFactor(PixelFormat format) {
203 return compression_factor_table[static_cast<std::size_t>(format)]; 206 return 1U << GetCompressionFactorShift(format);
204} 207}
205 208
206constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 209constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
@@ -436,6 +439,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
436 return GetFormatBpp(pixel_format) / CHAR_BIT; 439 return GetFormatBpp(pixel_format) / CHAR_BIT;
437} 440}
438 441
442enum class SurfaceCompression {
443 None, // Not compressed
444 Compressed, // Texture is compressed
445 Converted, // Texture is converted before upload or after download
446 Rearranged, // Texture is swizzled before upload or after download
447};
448
449constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
450 SurfaceCompression::None, // ABGR8U
451 SurfaceCompression::None, // ABGR8S
452 SurfaceCompression::None, // ABGR8UI
453 SurfaceCompression::None, // B5G6R5U
454 SurfaceCompression::None, // A2B10G10R10U
455 SurfaceCompression::None, // A1B5G5R5U
456 SurfaceCompression::None, // R8U
457 SurfaceCompression::None, // R8UI
458 SurfaceCompression::None, // RGBA16F
459 SurfaceCompression::None, // RGBA16U
460 SurfaceCompression::None, // RGBA16UI
461 SurfaceCompression::None, // R11FG11FB10F
462 SurfaceCompression::None, // RGBA32UI
463 SurfaceCompression::Compressed, // DXT1
464 SurfaceCompression::Compressed, // DXT23
465 SurfaceCompression::Compressed, // DXT45
466 SurfaceCompression::Compressed, // DXN1
467 SurfaceCompression::Compressed, // DXN2UNORM
468 SurfaceCompression::Compressed, // DXN2SNORM
469 SurfaceCompression::Compressed, // BC7U
470 SurfaceCompression::Compressed, // BC6H_UF16
471 SurfaceCompression::Compressed, // BC6H_SF16
472 SurfaceCompression::Converted, // ASTC_2D_4X4
473 SurfaceCompression::None, // BGRA8
474 SurfaceCompression::None, // RGBA32F
475 SurfaceCompression::None, // RG32F
476 SurfaceCompression::None, // R32F
477 SurfaceCompression::None, // R16F
478 SurfaceCompression::None, // R16U
479 SurfaceCompression::None, // R16S
480 SurfaceCompression::None, // R16UI
481 SurfaceCompression::None, // R16I
482 SurfaceCompression::None, // RG16
483 SurfaceCompression::None, // RG16F
484 SurfaceCompression::None, // RG16UI
485 SurfaceCompression::None, // RG16I
486 SurfaceCompression::None, // RG16S
487 SurfaceCompression::None, // RGB32F
488 SurfaceCompression::None, // RGBA8_SRGB
489 SurfaceCompression::None, // RG8U
490 SurfaceCompression::None, // RG8S
491 SurfaceCompression::None, // RG32UI
492 SurfaceCompression::None, // R32UI
493 SurfaceCompression::Converted, // ASTC_2D_8X8
494 SurfaceCompression::Converted, // ASTC_2D_8X5
495 SurfaceCompression::Converted, // ASTC_2D_5X4
496 SurfaceCompression::None, // BGRA8_SRGB
497 SurfaceCompression::Compressed, // DXT1_SRGB
498 SurfaceCompression::Compressed, // DXT23_SRGB
499 SurfaceCompression::Compressed, // DXT45_SRGB
500 SurfaceCompression::Compressed, // BC7U_SRGB
501 SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
502 SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
503 SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
504 SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
505 SurfaceCompression::Converted, // ASTC_2D_5X5
506 SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
507 SurfaceCompression::Converted, // ASTC_2D_10X8
508 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
509 SurfaceCompression::None, // Z32F
510 SurfaceCompression::None, // Z16
511 SurfaceCompression::None, // Z24S8
512 SurfaceCompression::Rearranged, // S8Z24
513 SurfaceCompression::None, // Z32FS8
514}};
515
516constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
517 if (format == PixelFormat::Invalid) {
518 return SurfaceCompression::None;
519 }
520 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
521 return compression_type_table[static_cast<std::size_t>(format)];
522}
523
439SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 524SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
440 525
441bool SurfaceTargetIsLayered(SurfaceTarget target); 526bool SurfaceTargetIsLayered(SurfaceTarget target);
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp
deleted file mode 100644
index e96eba7cc..000000000
--- a/src/video_core/texture_cache.cpp
+++ /dev/null
@@ -1,386 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/alignment.h"
6#include "common/assert.h"
7#include "common/cityhash.h"
8#include "common/common_types.h"
9#include "core/core.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache.h"
12#include "video_core/textures/decoders.h"
13#include "video_core/textures/texture.h"
14
15namespace VideoCommon {
16
17using VideoCore::Surface::SurfaceTarget;
18
19using VideoCore::Surface::ComponentTypeFromDepthFormat;
20using VideoCore::Surface::ComponentTypeFromRenderTarget;
21using VideoCore::Surface::ComponentTypeFromTexture;
22using VideoCore::Surface::PixelFormatFromDepthFormat;
23using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
24using VideoCore::Surface::PixelFormatFromTextureFormat;
25using VideoCore::Surface::SurfaceTargetFromTextureType;
26
27constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
28 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
29}
30
31SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
32 const Tegra::Texture::FullTextureInfo& config) {
33 SurfaceParams params;
34 params.is_tiled = config.tic.IsTiled();
35 params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
36 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
37 params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
38 params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
39 params.pixel_format =
40 PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false);
41 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
42 params.type = GetFormatType(params.pixel_format);
43 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
44 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
45 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
46 params.depth = config.tic.Depth();
47 if (params.target == SurfaceTarget::TextureCubemap ||
48 params.target == SurfaceTarget::TextureCubeArray) {
49 params.depth *= 6;
50 }
51 params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
52 params.unaligned_height = config.tic.Height();
53 params.num_levels = config.tic.max_mip_level + 1;
54
55 params.CalculateCachedValues();
56 return params;
57}
58
59SurfaceParams SurfaceParams::CreateForDepthBuffer(
60 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
61 u32 block_width, u32 block_height, u32 block_depth,
62 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
63 SurfaceParams params;
64 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
65 params.block_width = 1 << std::min(block_width, 5U);
66 params.block_height = 1 << std::min(block_height, 5U);
67 params.block_depth = 1 << std::min(block_depth, 5U);
68 params.tile_width_spacing = 1;
69 params.pixel_format = PixelFormatFromDepthFormat(format);
70 params.component_type = ComponentTypeFromDepthFormat(format);
71 params.type = GetFormatType(params.pixel_format);
72 params.width = zeta_width;
73 params.height = zeta_height;
74 params.unaligned_height = zeta_height;
75 params.target = SurfaceTarget::Texture2D;
76 params.depth = 1;
77 params.num_levels = 1;
78
79 params.CalculateCachedValues();
80 return params;
81}
82
83SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
84 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
85 SurfaceParams params;
86 params.is_tiled =
87 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
88 params.block_width = 1 << config.memory_layout.block_width;
89 params.block_height = 1 << config.memory_layout.block_height;
90 params.block_depth = 1 << config.memory_layout.block_depth;
91 params.tile_width_spacing = 1;
92 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
93 params.component_type = ComponentTypeFromRenderTarget(config.format);
94 params.type = GetFormatType(params.pixel_format);
95 if (params.is_tiled) {
96 params.width = config.width;
97 } else {
98 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
99 params.pitch = config.width;
100 params.width = params.pitch / bpp;
101 }
102 params.height = config.height;
103 params.depth = 1;
104 params.unaligned_height = config.height;
105 params.target = SurfaceTarget::Texture2D;
106 params.num_levels = 1;
107
108 params.CalculateCachedValues();
109 return params;
110}
111
112SurfaceParams SurfaceParams::CreateForFermiCopySurface(
113 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
114 SurfaceParams params{};
115 params.is_tiled = !config.linear;
116 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
117 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
118 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
119 params.tile_width_spacing = 1;
120 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
121 params.component_type = ComponentTypeFromRenderTarget(config.format);
122 params.type = GetFormatType(params.pixel_format);
123 params.width = config.width;
124 params.height = config.height;
125 params.unaligned_height = config.height;
126 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
127 params.target = SurfaceTarget::Texture2D;
128 params.depth = 1;
129 params.num_levels = 1;
130
131 params.CalculateCachedValues();
132 return params;
133}
134
135u32 SurfaceParams::GetMipWidth(u32 level) const {
136 return std::max(1U, width >> level);
137}
138
139u32 SurfaceParams::GetMipHeight(u32 level) const {
140 return std::max(1U, height >> level);
141}
142
143u32 SurfaceParams::GetMipDepth(u32 level) const {
144 return IsLayered() ? depth : std::max(1U, depth >> level);
145}
146
147bool SurfaceParams::IsLayered() const {
148 switch (target) {
149 case SurfaceTarget::Texture1DArray:
150 case SurfaceTarget::Texture2DArray:
151 case SurfaceTarget::TextureCubeArray:
152 case SurfaceTarget::TextureCubemap:
153 return true;
154 default:
155 return false;
156 }
157}
158
159u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
160 // Auto block resizing algorithm from:
161 // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
162 if (level == 0) {
163 return block_height;
164 }
165 const u32 height{GetMipHeight(level)};
166 const u32 default_block_height{GetDefaultBlockHeight(pixel_format)};
167 const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height};
168 u32 block_height = 16;
169 while (block_height > 1 && blocks_in_y <= block_height * 4) {
170 block_height >>= 1;
171 }
172 return block_height;
173}
174
175u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
176 if (level == 0)
177 return block_depth;
178 if (target != SurfaceTarget::Texture3D)
179 return 1;
180
181 const u32 depth{GetMipDepth(level)};
182 u32 block_depth = 32;
183 while (block_depth > 1 && depth * 2 <= block_depth) {
184 block_depth >>= 1;
185 }
186 if (block_depth == 32 && GetMipBlockHeight(level) >= 4) {
187 return 16;
188 }
189 return block_depth;
190}
191
192std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
193 std::size_t offset = 0;
194 for (u32 i = 0; i < level; i++) {
195 offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false);
196 }
197 return offset;
198}
199
200std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
201 std::size_t offset = 0;
202 for (u32 i = 0; i < level; i++) {
203 offset += GetInnerMipmapMemorySize(i, true, false, false);
204 }
205 return offset;
206}
207
208std::size_t SurfaceParams::GetGuestLayerSize() const {
209 return GetInnerMemorySize(false, true, false);
210}
211
212std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
213 return GetInnerMipmapMemorySize(level, true, IsLayered(), false);
214}
215
216bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const {
217 if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) !=
218 std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format,
219 view_params.component_type, view_params.type)) {
220 return false;
221 }
222
223 const SurfaceTarget view_target{view_params.target};
224 if (view_target == target) {
225 return true;
226 }
227
228 switch (target) {
229 case SurfaceTarget::Texture1D:
230 case SurfaceTarget::Texture2D:
231 case SurfaceTarget::Texture3D:
232 return false;
233 case SurfaceTarget::Texture1DArray:
234 return view_target == SurfaceTarget::Texture1D;
235 case SurfaceTarget::Texture2DArray:
236 return view_target == SurfaceTarget::Texture2D;
237 case SurfaceTarget::TextureCubemap:
238 return view_target == SurfaceTarget::Texture2D ||
239 view_target == SurfaceTarget::Texture2DArray;
240 case SurfaceTarget::TextureCubeArray:
241 return view_target == SurfaceTarget::Texture2D ||
242 view_target == SurfaceTarget::Texture2DArray ||
243 view_target == SurfaceTarget::TextureCubemap;
244 default:
245 UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target));
246 return false;
247 }
248}
249
250bool SurfaceParams::IsPixelFormatZeta() const {
251 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
252 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
253}
254
255void SurfaceParams::CalculateCachedValues() {
256 guest_size_in_bytes = GetInnerMemorySize(false, false, false);
257
258 // ASTC is uncompressed in software, in emulated as RGBA8
259 if (IsPixelFormatASTC(pixel_format)) {
260 host_size_in_bytes = width * height * depth * 4;
261 } else {
262 host_size_in_bytes = GetInnerMemorySize(true, false, false);
263 }
264
265 switch (target) {
266 case SurfaceTarget::Texture1D:
267 case SurfaceTarget::Texture2D:
268 case SurfaceTarget::Texture3D:
269 num_layers = 1;
270 break;
271 case SurfaceTarget::Texture1DArray:
272 case SurfaceTarget::Texture2DArray:
273 case SurfaceTarget::TextureCubemap:
274 case SurfaceTarget::TextureCubeArray:
275 num_layers = depth;
276 break;
277 default:
278 UNREACHABLE();
279 }
280}
281
282std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
283 bool uncompressed) const {
284 const bool tiled{as_host_size ? false : is_tiled};
285 const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
286 const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
287 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)};
288 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)};
289 const u32 depth{layer_only ? 1U : GetMipDepth(level)};
290 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height,
291 depth, GetMipBlockHeight(level), GetMipBlockDepth(level));
292}
293
294std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only,
295 bool uncompressed) const {
296 std::size_t size = 0;
297 for (u32 level = 0; level < num_levels; ++level) {
298 size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed);
299 }
300 if (!as_host_size && is_tiled) {
301 size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
302 }
303 return size;
304}
305
306std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const {
307 std::map<u64, std::pair<u32, u32>> view_offset_map;
308 switch (target) {
309 case SurfaceTarget::Texture1D:
310 case SurfaceTarget::Texture2D:
311 case SurfaceTarget::Texture3D: {
312 constexpr u32 layer = 0;
313 for (u32 level = 0; level < num_levels; ++level) {
314 const std::size_t offset{GetGuestMipmapLevelOffset(level)};
315 view_offset_map.insert({offset, {layer, level}});
316 }
317 break;
318 }
319 case SurfaceTarget::Texture1DArray:
320 case SurfaceTarget::Texture2DArray:
321 case SurfaceTarget::TextureCubemap:
322 case SurfaceTarget::TextureCubeArray: {
323 const std::size_t layer_size{GetGuestLayerSize()};
324 for (u32 level = 0; level < num_levels; ++level) {
325 const std::size_t level_offset{GetGuestMipmapLevelOffset(level)};
326 for (u32 layer = 0; layer < num_layers; ++layer) {
327 const auto layer_offset{static_cast<std::size_t>(layer_size * layer)};
328 const std::size_t offset{level_offset + layer_offset};
329 view_offset_map.insert({offset, {layer, level}});
330 }
331 }
332 break;
333 }
334 default:
335 UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target));
336 }
337 return view_offset_map;
338}
339
340bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const {
341 return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) &&
342 IsInBounds(view_params, layer, level);
343}
344
345bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const {
346 return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level);
347}
348
349bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const {
350 if (view_params.target != SurfaceTarget::Texture3D) {
351 return true;
352 }
353 return view_params.depth == GetMipDepth(level);
354}
355
356bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const {
357 return layer + view_params.num_layers <= num_layers &&
358 level + view_params.num_levels <= num_levels;
359}
360
361std::size_t HasheableSurfaceParams::Hash() const {
362 return static_cast<std::size_t>(
363 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
364}
365
366bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const {
367 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
368 height, depth, pitch, unaligned_height, num_levels, pixel_format,
369 component_type, type, target) ==
370 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
371 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
372 rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type,
373 rhs.type, rhs.target);
374}
375
376std::size_t ViewKey::Hash() const {
377 return static_cast<std::size_t>(
378 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
379}
380
381bool ViewKey::operator==(const ViewKey& rhs) const {
382 return std::tie(base_layer, num_layers, base_level, num_levels) ==
383 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels);
384}
385
386} // namespace VideoCommon
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h
deleted file mode 100644
index 041551691..000000000
--- a/src/video_core/texture_cache.h
+++ /dev/null
@@ -1,586 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <memory>
9#include <set>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13
14#include <boost/icl/interval_map.hpp>
15#include <boost/range/iterator_range.hpp>
16
17#include "common/assert.h"
18#include "common/common_types.h"
19#include "core/memory.h"
20#include "video_core/engines/fermi_2d.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/gpu.h"
23#include "video_core/rasterizer_interface.h"
24#include "video_core/surface.h"
25
26namespace Core {
27class System;
28}
29
30namespace Tegra::Texture {
31struct FullTextureInfo;
32}
33
34namespace VideoCore {
35class RasterizerInterface;
36}
37
38namespace VideoCommon {
39
40class HasheableSurfaceParams {
41public:
42 std::size_t Hash() const;
43
44 bool operator==(const HasheableSurfaceParams& rhs) const;
45
46protected:
47 // Avoid creation outside of a managed environment.
48 HasheableSurfaceParams() = default;
49
50 bool is_tiled;
51 u32 block_width;
52 u32 block_height;
53 u32 block_depth;
54 u32 tile_width_spacing;
55 u32 width;
56 u32 height;
57 u32 depth;
58 u32 pitch;
59 u32 unaligned_height;
60 u32 num_levels;
61 VideoCore::Surface::PixelFormat pixel_format;
62 VideoCore::Surface::ComponentType component_type;
63 VideoCore::Surface::SurfaceType type;
64 VideoCore::Surface::SurfaceTarget target;
65};
66
67class SurfaceParams final : public HasheableSurfaceParams {
68public:
69 /// Creates SurfaceCachedParams from a texture configuration.
70 static SurfaceParams CreateForTexture(Core::System& system,
71 const Tegra::Texture::FullTextureInfo& config);
72
73 /// Creates SurfaceCachedParams for a depth buffer configuration.
74 static SurfaceParams CreateForDepthBuffer(
75 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
76 u32 block_width, u32 block_height, u32 block_depth,
77 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
78
79 /// Creates SurfaceCachedParams from a framebuffer configuration.
80 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
81
82 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
83 static SurfaceParams CreateForFermiCopySurface(
84 const Tegra::Engines::Fermi2D::Regs::Surface& config);
85
86 bool IsTiled() const {
87 return is_tiled;
88 }
89
90 u32 GetBlockWidth() const {
91 return block_width;
92 }
93
94 u32 GetTileWidthSpacing() const {
95 return tile_width_spacing;
96 }
97
98 u32 GetWidth() const {
99 return width;
100 }
101
102 u32 GetHeight() const {
103 return height;
104 }
105
106 u32 GetDepth() const {
107 return depth;
108 }
109
110 u32 GetPitch() const {
111 return pitch;
112 }
113
114 u32 GetNumLevels() const {
115 return num_levels;
116 }
117
118 VideoCore::Surface::PixelFormat GetPixelFormat() const {
119 return pixel_format;
120 }
121
122 VideoCore::Surface::ComponentType GetComponentType() const {
123 return component_type;
124 }
125
126 VideoCore::Surface::SurfaceTarget GetTarget() const {
127 return target;
128 }
129
130 VideoCore::Surface::SurfaceType GetType() const {
131 return type;
132 }
133
134 std::size_t GetGuestSizeInBytes() const {
135 return guest_size_in_bytes;
136 }
137
138 std::size_t GetHostSizeInBytes() const {
139 return host_size_in_bytes;
140 }
141
142 u32 GetNumLayers() const {
143 return num_layers;
144 }
145
146 /// Returns the width of a given mipmap level.
147 u32 GetMipWidth(u32 level) const;
148
149 /// Returns the height of a given mipmap level.
150 u32 GetMipHeight(u32 level) const;
151
152 /// Returns the depth of a given mipmap level.
153 u32 GetMipDepth(u32 level) const;
154
155 /// Returns true if these parameters are from a layered surface.
156 bool IsLayered() const;
157
158 /// Returns the block height of a given mipmap level.
159 u32 GetMipBlockHeight(u32 level) const;
160
161 /// Returns the block depth of a given mipmap level.
162 u32 GetMipBlockDepth(u32 level) const;
163
164 /// Returns the offset in bytes in guest memory of a given mipmap level.
165 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
166
167 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
168 std::size_t GetHostMipmapLevelOffset(u32 level) const;
169
170 /// Returns the size of a layer in bytes in guest memory.
171 std::size_t GetGuestLayerSize() const;
172
173 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
174 std::size_t GetHostLayerSize(u32 level) const;
175
176 /// Returns true if another surface can be familiar with this. This is a loosely defined term
177 /// that reflects the possibility of these two surface parameters potentially being part of a
178 /// bigger superset.
179 bool IsFamiliar(const SurfaceParams& view_params) const;
180
181 /// Returns true if the pixel format is a depth and/or stencil format.
182 bool IsPixelFormatZeta() const;
183
184 /// Creates a map that redirects an address difference to a layer and mipmap level.
185 std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;
186
187 /// Returns true if the passed surface view parameters is equal or a valid subset of this.
188 bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;
189
190private:
191 /// Calculates values that can be deduced from HasheableSurfaceParams.
192 void CalculateCachedValues();
193
194 /// Returns the size of a given mipmap level.
195 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
196 bool uncompressed) const;
197
198 /// Returns the size of all mipmap levels and aligns as needed.
199 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;
200
201 /// Returns true if the passed view width and height match the size of this params in a given
202 /// mipmap level.
203 bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;
204
205 /// Returns true if the passed view depth match the size of this params in a given mipmap level.
206 bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;
207
208 /// Returns true if the passed view layers and mipmap levels are in bounds.
209 bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;
210
211 std::size_t guest_size_in_bytes;
212 std::size_t host_size_in_bytes;
213 u32 num_layers;
214};
215
216struct ViewKey {
217 std::size_t Hash() const;
218
219 bool operator==(const ViewKey& rhs) const;
220
221 u32 base_layer{};
222 u32 num_layers{};
223 u32 base_level{};
224 u32 num_levels{};
225};
226
227} // namespace VideoCommon
228
229namespace std {
230
231template <>
232struct hash<VideoCommon::SurfaceParams> {
233 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
234 return k.Hash();
235 }
236};
237
238template <>
239struct hash<VideoCommon::ViewKey> {
240 std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
241 return k.Hash();
242 }
243};
244
245} // namespace std
246
247namespace VideoCommon {
248
249template <typename TView, typename TExecutionContext>
250class SurfaceBase {
251 static_assert(std::is_trivially_copyable_v<TExecutionContext>);
252
253public:
254 virtual void LoadBuffer() = 0;
255
256 virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;
257
258 virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;
259
260 TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
261 if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
262 // It can't be a view if it's in a prior address.
263 return {};
264 }
265
266 const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
267 const auto it{view_offset_map.find(relative_offset)};
268 if (it == view_offset_map.end()) {
269 // Couldn't find an aligned view.
270 return {};
271 }
272 const auto [layer, level] = it->second;
273
274 if (!params.IsViewValid(view_params, layer, level)) {
275 return {};
276 }
277
278 return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
279 }
280
281 VAddr GetCpuAddr() const {
282 ASSERT(is_registered);
283 return cpu_addr;
284 }
285
286 u8* GetHostPtr() const {
287 ASSERT(is_registered);
288 return host_ptr;
289 }
290
291 CacheAddr GetCacheAddr() const {
292 ASSERT(is_registered);
293 return cache_addr;
294 }
295
296 std::size_t GetSizeInBytes() const {
297 return params.GetGuestSizeInBytes();
298 }
299
300 void MarkAsModified(bool is_modified_) {
301 is_modified = is_modified_;
302 }
303
304 const SurfaceParams& GetSurfaceParams() const {
305 return params;
306 }
307
308 TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
309 TView* view{TryGetView(view_addr, view_params)};
310 ASSERT(view != nullptr);
311 return view;
312 }
313
314 void Register(VAddr cpu_addr_, u8* host_ptr_) {
315 ASSERT(!is_registered);
316 is_registered = true;
317 cpu_addr = cpu_addr_;
318 host_ptr = host_ptr_;
319 cache_addr = ToCacheAddr(host_ptr_);
320 }
321
322 void Register(VAddr cpu_addr_) {
323 Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
324 }
325
326 void Unregister() {
327 ASSERT(is_registered);
328 is_registered = false;
329 }
330
331 bool IsRegistered() const {
332 return is_registered;
333 }
334
335protected:
336 explicit SurfaceBase(const SurfaceParams& params)
337 : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}
338
339 ~SurfaceBase() = default;
340
341 virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;
342
343 bool IsModified() const {
344 return is_modified;
345 }
346
347 const SurfaceParams params;
348
349private:
350 TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
351 const ViewKey key{base_layer, num_layers, base_level, num_levels};
352 const auto [entry, is_cache_miss] = views.try_emplace(key);
353 auto& view{entry->second};
354 if (is_cache_miss) {
355 view = CreateView(key);
356 }
357 return view.get();
358 }
359
360 const std::map<u64, std::pair<u32, u32>> view_offset_map;
361
362 VAddr cpu_addr{};
363 u8* host_ptr{};
364 CacheAddr cache_addr{};
365 bool is_modified{};
366 bool is_registered{};
367 std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
368};
369
370template <typename TSurface, typename TView, typename TExecutionContext>
371class TextureCache {
372 static_assert(std::is_trivially_copyable_v<TExecutionContext>);
373 using ResultType = std::tuple<TView*, TExecutionContext>;
374 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
375 using IntervalType = typename IntervalMap::interval_type;
376
377public:
378 void InvalidateRegion(CacheAddr addr, std::size_t size) {
379 for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
380 if (!surface->IsRegistered()) {
381 // Skip duplicates
382 continue;
383 }
384 Unregister(surface);
385 }
386 }
387
388 ResultType GetTextureSurface(TExecutionContext exctx,
389 const Tegra::Texture::FullTextureInfo& config) {
390 auto& memory_manager{system.GPU().MemoryManager()};
391 const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
392 if (!cpu_addr) {
393 return {{}, exctx};
394 }
395 const auto params{SurfaceParams::CreateForTexture(system, config)};
396 return GetSurfaceView(exctx, *cpu_addr, params, true);
397 }
398
399 ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
400 const auto& regs{system.GPU().Maxwell3D().regs};
401 if (!regs.zeta.Address() || !regs.zeta_enable) {
402 return {{}, exctx};
403 }
404
405 auto& memory_manager{system.GPU().MemoryManager()};
406 const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
407 if (!cpu_addr) {
408 return {{}, exctx};
409 }
410
411 const auto depth_params{SurfaceParams::CreateForDepthBuffer(
412 system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
413 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
414 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
415 return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
416 }
417
418 ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
419 bool preserve_contents) {
420 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
421
422 const auto& regs{system.GPU().Maxwell3D().regs};
423 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
424 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
425 return {{}, exctx};
426 }
427
428 auto& memory_manager{system.GPU().MemoryManager()};
429 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
430 const auto cpu_addr{memory_manager.GpuToCpuAddress(
431 config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
432 if (!cpu_addr) {
433 return {{}, exctx};
434 }
435
436 return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
437 preserve_contents);
438 }
439
440 ResultType GetFermiSurface(TExecutionContext exctx,
441 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
442 const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
443 ASSERT(cpu_addr);
444 return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
445 true);
446 }
447
448 TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
449 const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
450 return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
451 }
452
453protected:
454 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
455 : system{system}, rasterizer{rasterizer} {}
456
457 ~TextureCache() = default;
458
459 virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
460 const SurfaceParams& params, bool preserve_contents,
461 const std::vector<TSurface*>& overlaps) = 0;
462
463 virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;
464
465 void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
466 surface->Register(cpu_addr, host_ptr);
467 registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
468 rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
469 }
470
471 void Unregister(TSurface* surface) {
472 registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
473 rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
474 surface->Unregister();
475 }
476
477 TSurface* GetUncachedSurface(const SurfaceParams& params) {
478 if (TSurface* surface = TryGetReservedSurface(params); surface)
479 return surface;
480 // No reserved surface available, create a new one and reserve it
481 auto new_surface{CreateSurface(params)};
482 TSurface* surface{new_surface.get()};
483 ReserveSurface(params, std::move(new_surface));
484 return surface;
485 }
486
487 Core::System& system;
488
489private:
490 ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
491 bool preserve_contents) {
492 const auto host_ptr{Memory::GetPointer(cpu_addr)};
493 const auto cache_addr{ToCacheAddr(host_ptr)};
494 const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
495 if (overlaps.empty()) {
496 return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
497 }
498
499 if (overlaps.size() == 1) {
500 if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
501 return {view, exctx};
502 }
503
504 TView* fast_view;
505 std::tie(fast_view, exctx) =
506 TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);
507
508 for (TSurface* surface : overlaps) {
509 if (!fast_view) {
510 // Flush even when we don't care about the contents, to preserve memory not written
511 // by the new surface.
512 exctx = surface->FlushBuffer(exctx);
513 }
514 Unregister(surface);
515 }
516
517 if (fast_view) {
518 return {fast_view, exctx};
519 }
520
521 return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
522 }
523
524 ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
525 const SurfaceParams& params, bool preserve_contents) {
526 TSurface* new_surface{GetUncachedSurface(params)};
527 Register(new_surface, cpu_addr, host_ptr);
528 if (preserve_contents) {
529 exctx = LoadSurface(exctx, new_surface);
530 }
531 return {new_surface->GetView(cpu_addr, params), exctx};
532 }
533
534 TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
535 surface->LoadBuffer();
536 exctx = surface->UploadTexture(exctx);
537 surface->MarkAsModified(false);
538 return exctx;
539 }
540
541 std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
542 if (size == 0) {
543 return {};
544 }
545 const IntervalType interval{cache_addr, cache_addr + size};
546
547 std::vector<TSurface*> surfaces;
548 for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
549 surfaces.push_back(*pair.second.begin());
550 }
551 return surfaces;
552 }
553
554 void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
555 surface_reserve[params].push_back(std::move(surface));
556 }
557
558 TSurface* TryGetReservedSurface(const SurfaceParams& params) {
559 auto search{surface_reserve.find(params)};
560 if (search == surface_reserve.end()) {
561 return {};
562 }
563 for (auto& surface : search->second) {
564 if (!surface->IsRegistered()) {
565 return surface.get();
566 }
567 }
568 return {};
569 }
570
571 IntervalType GetSurfaceInterval(TSurface* surface) const {
572 return IntervalType::right_open(surface->GetCacheAddr(),
573 surface->GetCacheAddr() + surface->GetSizeInBytes());
574 }
575
576 VideoCore::RasterizerInterface& rasterizer;
577
578 IntervalMap registered_surfaces;
579
580 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
581 /// previously been used. This is to prevent surfaces from being constantly created and
582 /// destroyed when used with different surface parameters.
583 std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
584};
585
586} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
new file mode 100644
index 000000000..9c21a0649
--- /dev/null
+++ b/src/video_core/texture_cache/copy_params.h
@@ -0,0 +1,36 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
13 u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
14 u32 depth)
15 : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
16 dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
17 dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
18
19 constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
21 dest_level{level}, width{width}, height{height}, depth{depth} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
new file mode 100644
index 000000000..683c49207
--- /dev/null
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -0,0 +1,302 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/microprofile.h"
8#include "video_core/memory_manager.h"
9#include "video_core/texture_cache/surface_base.h"
10#include "video_core/texture_cache/surface_params.h"
11#include "video_core/textures/convert.h"
12
13namespace VideoCommon {
14
15MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
16MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
17
18using Tegra::Texture::ConvertFromGuestToHost;
19using VideoCore::MortonSwizzleMode;
20using VideoCore::Surface::SurfaceCompression;
21
22StagingCache::StagingCache() = default;
23
24StagingCache::~StagingCache() = default;
25
26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
27 : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
28 mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
29 std::size_t offset = 0;
30 for (u32 level = 0; level < params.num_levels; ++level) {
31 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
32 mipmap_sizes[level] = mipmap_size;
33 mipmap_offsets[level] = offset;
34 offset += mipmap_size;
35 }
36 layer_size = offset;
37 if (params.is_layered) {
38 if (params.is_tiled) {
39 layer_size =
40 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
41 }
42 guest_memory_size = layer_size * params.depth;
43 } else {
44 guest_memory_size = layer_size;
45 }
46}
47
48MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
49 const u32 src_bpp{params.GetBytesPerPixel()};
50 const u32 dst_bpp{rhs.GetBytesPerPixel()};
51 const bool ib1 = params.IsBuffer();
52 const bool ib2 = rhs.IsBuffer();
53 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
54 const bool cb1 = params.IsCompressed();
55 const bool cb2 = rhs.IsCompressed();
56 if (cb1 == cb2) {
57 return MatchTopologyResult::FullMatch;
58 }
59 return MatchTopologyResult::CompressUnmatch;
60 }
61 return MatchTopologyResult::None;
62}
63
64MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
65 // Buffer surface Check
66 if (params.IsBuffer()) {
67 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
68 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
69 if (wd1 == wd2) {
70 return MatchStructureResult::FullMatch;
71 }
72 return MatchStructureResult::None;
73 }
74
75 // Linear Surface check
76 if (!params.is_tiled) {
77 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
78 if (params.width == rhs.width) {
79 return MatchStructureResult::FullMatch;
80 } else {
81 return MatchStructureResult::SemiMatch;
82 }
83 }
84 return MatchStructureResult::None;
85 }
86
87 // Tiled Surface check
88 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
89 params.tile_width_spacing, params.num_levels) ==
90 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
91 rhs.tile_width_spacing, rhs.num_levels)) {
92 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
93 return MatchStructureResult::FullMatch;
94 }
95 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
96 rhs.pixel_format);
97 const u32 hs =
98 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
99 const u32 w1 = params.GetBlockAlignedWidth();
100 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
101 return MatchStructureResult::SemiMatch;
102 }
103 }
104 return MatchStructureResult::None;
105}
106
107std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
108 const GPUVAddr candidate_gpu_addr) const {
109 if (gpu_addr == candidate_gpu_addr) {
110 return {{0, 0}};
111 }
112 if (candidate_gpu_addr < gpu_addr) {
113 return {};
114 }
115 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
116 const auto layer{static_cast<u32>(relative_address / layer_size)};
117 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
118 const auto mipmap_it =
119 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
120 if (mipmap_it == mipmap_offsets.end()) {
121 return {};
122 }
123 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
124 return std::make_pair(layer, level);
125}
126
127std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
128 const u32 layers{params.depth};
129 const u32 mipmaps{params.num_levels};
130 std::vector<CopyParams> result;
131 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
132
133 for (u32 layer = 0; layer < layers; layer++) {
134 for (u32 level = 0; level < mipmaps; level++) {
135 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
136 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
137 result.emplace_back(width, height, layer, level);
138 }
139 }
140 return result;
141}
142
143std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
144 const u32 mipmaps{params.num_levels};
145 std::vector<CopyParams> result;
146 result.reserve(mipmaps);
147
148 for (u32 level = 0; level < mipmaps; level++) {
149 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
150 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
151 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
152 result.emplace_back(width, height, depth, level);
153 }
154 return result;
155}
156
157void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
158 u8* buffer, u32 level) {
159 const u32 width{params.GetMipWidth(level)};
160 const u32 height{params.GetMipHeight(level)};
161 const u32 block_height{params.GetMipBlockHeight(level)};
162 const u32 block_depth{params.GetMipBlockDepth(level)};
163
164 std::size_t guest_offset{mipmap_offsets[level]};
165 if (params.is_layered) {
166 std::size_t host_offset{0};
167 const std::size_t guest_stride = layer_size;
168 const std::size_t host_stride = params.GetHostLayerSize(level);
169 for (u32 layer = 0; layer < params.depth; ++layer) {
170 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
171 params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
172 guest_offset += guest_stride;
173 host_offset += host_stride;
174 }
175 } else {
176 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
177 params.GetMipDepth(level), params.tile_width_spacing, buffer,
178 memory + guest_offset);
179 }
180}
181
182void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
183 StagingCache& staging_cache) {
184 MICROPROFILE_SCOPE(GPU_Load_Texture);
185 auto& staging_buffer = staging_cache.GetBuffer(0);
186 u8* host_ptr;
187 is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
188
189 // Handle continuouty
190 if (is_continuous) {
191 // Use physical memory directly
192 host_ptr = memory_manager.GetPointer(gpu_addr);
193 if (!host_ptr) {
194 return;
195 }
196 } else {
197 // Use an extra temporal buffer
198 auto& tmp_buffer = staging_cache.GetBuffer(1);
199 tmp_buffer.resize(guest_memory_size);
200 host_ptr = tmp_buffer.data();
201 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
202 }
203
204 if (params.is_tiled) {
205 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
206 params.block_width, static_cast<u32>(params.target));
207 for (u32 level = 0; level < params.num_levels; ++level) {
208 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
209 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
210 staging_buffer.data() + host_offset, level);
211 }
212 } else {
213 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
214 const u32 bpp{params.GetBytesPerPixel()};
215 const u32 block_width{params.GetDefaultBlockWidth()};
216 const u32 block_height{params.GetDefaultBlockHeight()};
217 const u32 width{(params.width + block_width - 1) / block_width};
218 const u32 height{(params.height + block_height - 1) / block_height};
219 const u32 copy_size{width * bpp};
220 if (params.pitch == copy_size) {
221 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
222 } else {
223 const u8* start{host_ptr};
224 u8* write_to{staging_buffer.data()};
225 for (u32 h = height; h > 0; --h) {
226 std::memcpy(write_to, start, copy_size);
227 start += params.pitch;
228 write_to += copy_size;
229 }
230 }
231 }
232
233 auto compression_type = params.GetCompressionType();
234 if (compression_type == SurfaceCompression::None ||
235 compression_type == SurfaceCompression::Compressed)
236 return;
237
238 for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
239 const u32 level = level_up - 1;
240 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
241 const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
242 ? in_host_offset
243 : params.GetConvertedMipmapOffset(level);
244 u8* in_buffer = staging_buffer.data() + in_host_offset;
245 u8* out_buffer = staging_buffer.data() + out_host_offset;
246 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
247 params.GetMipWidth(level), params.GetMipHeight(level),
248 params.GetMipDepth(level), true, true);
249 }
250}
251
252void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
253 StagingCache& staging_cache) {
254 MICROPROFILE_SCOPE(GPU_Flush_Texture);
255 auto& staging_buffer = staging_cache.GetBuffer(0);
256 u8* host_ptr;
257
258 // Handle continuouty
259 if (is_continuous) {
260 // Use physical memory directly
261 host_ptr = memory_manager.GetPointer(gpu_addr);
262 if (!host_ptr) {
263 return;
264 }
265 } else {
266 // Use an extra temporal buffer
267 auto& tmp_buffer = staging_cache.GetBuffer(1);
268 tmp_buffer.resize(guest_memory_size);
269 host_ptr = tmp_buffer.data();
270 }
271
272 if (params.is_tiled) {
273 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
274 for (u32 level = 0; level < params.num_levels; ++level) {
275 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
276 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
277 staging_buffer.data() + host_offset, level);
278 }
279 } else {
280 ASSERT(params.target == SurfaceTarget::Texture2D);
281 ASSERT(params.num_levels == 1);
282
283 const u32 bpp{params.GetBytesPerPixel()};
284 const u32 copy_size{params.width * bpp};
285 if (params.pitch == copy_size) {
286 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
287 } else {
288 u8* start{host_ptr};
289 const u8* read_to{staging_buffer.data()};
290 for (u32 h = params.height; h > 0; --h) {
291 std::memcpy(start, read_to, copy_size);
292 start += params.pitch;
293 read_to += copy_size;
294 }
295 }
296 }
297 if (!is_continuous) {
298 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
299 }
300}
301
302} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
new file mode 100644
index 000000000..5e497e49f
--- /dev/null
+++ b/src/video_core/texture_cache/surface_base.h
@@ -0,0 +1,325 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <unordered_map>
9#include <vector>
10
11#include "common/assert.h"
12#include "common/binary_find.h"
13#include "common/common_types.h"
14#include "video_core/gpu.h"
15#include "video_core/morton.h"
16#include "video_core/texture_cache/copy_params.h"
17#include "video_core/texture_cache/surface_params.h"
18#include "video_core/texture_cache/surface_view.h"
19
20namespace Tegra {
21class MemoryManager;
22}
23
24namespace VideoCommon {
25
26using VideoCore::MortonSwizzleMode;
27using VideoCore::Surface::SurfaceTarget;
28
29enum class MatchStructureResult : u32 {
30 FullMatch = 0,
31 SemiMatch = 1,
32 None = 2,
33};
34
35enum class MatchTopologyResult : u32 {
36 FullMatch = 0,
37 CompressUnmatch = 1,
38 None = 2,
39};
40
41class StagingCache {
42public:
43 explicit StagingCache();
44 ~StagingCache();
45
46 std::vector<u8>& GetBuffer(std::size_t index) {
47 return staging_buffer[index];
48 }
49
50 const std::vector<u8>& GetBuffer(std::size_t index) const {
51 return staging_buffer[index];
52 }
53
54 void SetSize(std::size_t size) {
55 staging_buffer.resize(size);
56 }
57
58private:
59 std::vector<std::vector<u8>> staging_buffer;
60};
61
62class SurfaceBaseImpl {
63public:
64 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
65
66 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
67
68 GPUVAddr GetGpuAddr() const {
69 return gpu_addr;
70 }
71
72 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
73 return (cache_addr < end) && (cache_addr_end > start);
74 }
75
76 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
77 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
78 return (gpu_addr <= other_start && other_end <= gpu_addr_end);
79 }
80
81 // Use only when recycling a surface
82 void SetGpuAddr(const GPUVAddr new_addr) {
83 gpu_addr = new_addr;
84 }
85
86 VAddr GetCpuAddr() const {
87 return cpu_addr;
88 }
89
90 void SetCpuAddr(const VAddr new_addr) {
91 cpu_addr = new_addr;
92 }
93
94 CacheAddr GetCacheAddr() const {
95 return cache_addr;
96 }
97
98 CacheAddr GetCacheAddrEnd() const {
99 return cache_addr_end;
100 }
101
102 void SetCacheAddr(const CacheAddr new_addr) {
103 cache_addr = new_addr;
104 cache_addr_end = new_addr + guest_memory_size;
105 }
106
107 const SurfaceParams& GetSurfaceParams() const {
108 return params;
109 }
110
111 std::size_t GetSizeInBytes() const {
112 return guest_memory_size;
113 }
114
115 std::size_t GetHostSizeInBytes() const {
116 return host_memory_size;
117 }
118
119 std::size_t GetMipmapSize(const u32 level) const {
120 return mipmap_sizes[level];
121 }
122
123 void MarkAsContinuous(const bool is_continuous) {
124 this->is_continuous = is_continuous;
125 }
126
127 bool IsContinuous() const {
128 return is_continuous;
129 }
130
131 bool IsLinear() const {
132 return !params.is_tiled;
133 }
134
135 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
136 return params.pixel_format == pixel_format;
137 }
138
139 VideoCore::Surface::PixelFormat GetFormat() const {
140 return params.pixel_format;
141 }
142
143 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
144 return params.target == target;
145 }
146
147 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
148
149 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
150
151 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
152 return std::tie(gpu_addr, params.target, params.num_levels) ==
153 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
154 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
155 }
156
157 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
158
159 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
160 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
161 }
162
163protected:
164 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
165 ~SurfaceBaseImpl() = default;
166
167 virtual void DecorateSurfaceName() = 0;
168
169 const SurfaceParams params;
170 std::size_t layer_size;
171 std::size_t guest_memory_size;
172 const std::size_t host_memory_size;
173 GPUVAddr gpu_addr{};
174 CacheAddr cache_addr{};
175 CacheAddr cache_addr_end{};
176 VAddr cpu_addr{};
177 bool is_continuous{};
178
179 std::vector<std::size_t> mipmap_sizes;
180 std::vector<std::size_t> mipmap_offsets;
181
182private:
183 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
184 u32 level);
185
186 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
187
188 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
189};
190
191template <typename TView>
192class SurfaceBase : public SurfaceBaseImpl {
193public:
194 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
195
196 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
197
198 void MarkAsModified(bool is_modified_, u64 tick) {
199 is_modified = is_modified_ || is_target;
200 modification_tick = tick;
201 }
202
203 void MarkAsRenderTarget(bool is_target_, u32 index_) {
204 is_target = is_target_;
205 index = index_;
206 }
207
208 void MarkAsPicked(bool is_picked_) {
209 is_picked = is_picked_;
210 }
211
212 bool IsModified() const {
213 return is_modified;
214 }
215
216 bool IsProtected() const {
217 // Only 3D Slices are to be protected
218 return is_target && params.block_depth > 0;
219 }
220
221 bool IsRenderTarget() const {
222 return is_target;
223 }
224
225 u32 GetRenderTarget() const {
226 return index;
227 }
228
229 bool IsRegistered() const {
230 return is_registered;
231 }
232
233 bool IsPicked() const {
234 return is_picked;
235 }
236
237 void MarkAsRegistered(bool is_reg) {
238 is_registered = is_reg;
239 }
240
241 u64 GetModificationTick() const {
242 return modification_tick;
243 }
244
245 TView EmplaceOverview(const SurfaceParams& overview_params) {
246 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
247 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
248 }
249
250 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
251 const GPUVAddr view_addr,
252 const std::size_t candidate_size, const u32 mipmap,
253 const u32 layer) {
254 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
255 if (!layer_mipmap) {
256 return {};
257 }
258 const u32 end_layer{layer_mipmap->first};
259 const u32 end_mipmap{layer_mipmap->second};
260 if (layer != end_layer) {
261 if (mipmap == 0 && end_mipmap == 0) {
262 return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1));
263 }
264 return {};
265 } else {
266 return GetView(
267 ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1));
268 }
269 }
270
271 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
272 const std::size_t candidate_size) {
273 if (params.target == SurfaceTarget::Texture3D ||
274 (params.num_levels == 1 && !params.is_layered) ||
275 view_params.target == SurfaceTarget::Texture3D) {
276 return {};
277 }
278 const auto layer_mipmap{GetLayerMipmap(view_addr)};
279 if (!layer_mipmap) {
280 return {};
281 }
282 const u32 layer{layer_mipmap->first};
283 const u32 mipmap{layer_mipmap->second};
284 if (GetMipmapSize(mipmap) != candidate_size) {
285 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
286 }
287 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
288 }
289
290 TView GetMainView() const {
291 return main_view;
292 }
293
294protected:
295 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
296 : SurfaceBaseImpl(gpu_addr, params) {}
297
298 ~SurfaceBase() = default;
299
300 virtual TView CreateView(const ViewParams& view_key) = 0;
301
302 TView main_view;
303 std::unordered_map<ViewParams, TView> views;
304
305private:
306 TView GetView(const ViewParams& key) {
307 const auto [entry, is_cache_miss] = views.try_emplace(key);
308 auto& view{entry->second};
309 if (is_cache_miss) {
310 view = CreateView(key);
311 }
312 return view;
313 }
314
315 static constexpr u32 NO_RT = 0xFFFFFFFF;
316
317 bool is_modified{};
318 bool is_target{};
319 bool is_registered{};
320 bool is_picked{};
321 u32 index{NO_RT};
322 u64 modification_tick{};
323};
324
325} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
new file mode 100644
index 000000000..1e4d3fb79
--- /dev/null
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -0,0 +1,389 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6
7#include "common/alignment.h"
8#include "common/bit_util.h"
9#include "core/core.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/surface_params.h"
13
14namespace VideoCommon {
15
16using VideoCore::Surface::ComponentTypeFromDepthFormat;
17using VideoCore::Surface::ComponentTypeFromRenderTarget;
18using VideoCore::Surface::ComponentTypeFromTexture;
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::PixelFormatFromTextureFormat;
23using VideoCore::Surface::SurfaceTarget;
24using VideoCore::Surface::SurfaceTargetFromTextureType;
25using VideoCore::Surface::SurfaceType;
26
27namespace {
28
29SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
30 switch (type) {
31 case Tegra::Shader::TextureType::Texture1D:
32 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
33 case Tegra::Shader::TextureType::Texture2D:
34 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
35 case Tegra::Shader::TextureType::Texture3D:
36 ASSERT(!is_array);
37 return SurfaceTarget::Texture3D;
38 case Tegra::Shader::TextureType::TextureCube:
39 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
40 default:
41 UNREACHABLE();
42 return SurfaceTarget::Texture2D;
43 }
44}
45
46SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
47 switch (type) {
48 case Tegra::Shader::ImageType::Texture1D:
49 return SurfaceTarget::Texture1D;
50 case Tegra::Shader::ImageType::TextureBuffer:
51 return SurfaceTarget::TextureBuffer;
52 case Tegra::Shader::ImageType::Texture1DArray:
53 return SurfaceTarget::Texture1DArray;
54 case Tegra::Shader::ImageType::Texture2D:
55 return SurfaceTarget::Texture2D;
56 case Tegra::Shader::ImageType::Texture2DArray:
57 return SurfaceTarget::Texture2DArray;
58 case Tegra::Shader::ImageType::Texture3D:
59 return SurfaceTarget::Texture3D;
60 default:
61 UNREACHABLE();
62 return SurfaceTarget::Texture2D;
63 }
64}
65
66constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
67 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
68}
69
70} // Anonymous namespace
71
72SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format =
82 PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16U:
87 case PixelFormat::R16F: {
88 params.pixel_format = PixelFormat::Z16;
89 break;
90 }
91 case PixelFormat::R32F: {
92 params.pixel_format = PixelFormat::Z32F;
93 break;
94 }
95 default: {
96 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
97 static_cast<u32>(params.pixel_format));
98 }
99 }
100 params.type = GetFormatType(params.pixel_format);
101 }
102 params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
103 params.type = GetFormatType(params.pixel_format);
104 // TODO: on 1DBuffer we should use the tic info.
105 if (tic.IsBuffer()) {
106 params.target = SurfaceTarget::TextureBuffer;
107 params.width = tic.Width();
108 params.pitch = params.width * params.GetBytesPerPixel();
109 params.height = 1;
110 params.depth = 1;
111 params.num_levels = 1;
112 params.emulated_levels = 1;
113 params.is_layered = false;
114 } else {
115 params.target = TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray());
116 params.width = tic.Width();
117 params.height = tic.Height();
118 params.depth = tic.Depth();
119 params.pitch = params.is_tiled ? 0 : tic.Pitch();
120 if (params.target == SurfaceTarget::TextureCubemap ||
121 params.target == SurfaceTarget::TextureCubeArray) {
122 params.depth *= 6;
123 }
124 params.num_levels = tic.max_mip_level + 1;
125 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
126 params.is_layered = params.IsLayered();
127 }
128 return params;
129}
130
131SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
132 const VideoCommon::Shader::Image& entry) {
133 SurfaceParams params;
134 params.is_tiled = tic.IsTiled();
135 params.srgb_conversion = tic.IsSrgbConversionEnabled();
136 params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
137 params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
138 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
139 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
140 params.pixel_format =
141 PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
142 params.type = GetFormatType(params.pixel_format);
143 params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
144 params.type = GetFormatType(params.pixel_format);
145 params.target = ImageTypeToSurfaceTarget(entry.GetType());
146 // TODO: on 1DBuffer we should use the tic info.
147 if (tic.IsBuffer()) {
148 params.target = SurfaceTarget::TextureBuffer;
149 params.width = tic.Width();
150 params.pitch = params.width * params.GetBytesPerPixel();
151 params.height = 1;
152 params.depth = 1;
153 params.num_levels = 1;
154 params.emulated_levels = 1;
155 params.is_layered = false;
156 } else {
157 params.width = tic.Width();
158 params.height = tic.Height();
159 params.depth = tic.Depth();
160 params.pitch = params.is_tiled ? 0 : tic.Pitch();
161 if (params.target == SurfaceTarget::TextureCubemap ||
162 params.target == SurfaceTarget::TextureCubeArray) {
163 params.depth *= 6;
164 }
165 params.num_levels = tic.max_mip_level + 1;
166 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
167 params.is_layered = params.IsLayered();
168 }
169 return params;
170}
171
172SurfaceParams SurfaceParams::CreateForDepthBuffer(
173 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
174 u32 block_width, u32 block_height, u32 block_depth,
175 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
176 SurfaceParams params;
177 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
178 params.srgb_conversion = false;
179 params.block_width = std::min(block_width, 5U);
180 params.block_height = std::min(block_height, 5U);
181 params.block_depth = std::min(block_depth, 5U);
182 params.tile_width_spacing = 1;
183 params.pixel_format = PixelFormatFromDepthFormat(format);
184 params.component_type = ComponentTypeFromDepthFormat(format);
185 params.type = GetFormatType(params.pixel_format);
186 params.width = zeta_width;
187 params.height = zeta_height;
188 params.target = SurfaceTarget::Texture2D;
189 params.depth = 1;
190 params.pitch = 0;
191 params.num_levels = 1;
192 params.emulated_levels = 1;
193 params.is_layered = false;
194 return params;
195}
196
197SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
198 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
199 SurfaceParams params;
200 params.is_tiled =
201 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
202 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
203 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
204 params.block_width = config.memory_layout.block_width;
205 params.block_height = config.memory_layout.block_height;
206 params.block_depth = config.memory_layout.block_depth;
207 params.tile_width_spacing = 1;
208 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
209 params.component_type = ComponentTypeFromRenderTarget(config.format);
210 params.type = GetFormatType(params.pixel_format);
211 if (params.is_tiled) {
212 params.pitch = 0;
213 params.width = config.width;
214 } else {
215 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
216 params.pitch = config.width;
217 params.width = params.pitch / bpp;
218 }
219 params.height = config.height;
220 params.depth = 1;
221 params.target = SurfaceTarget::Texture2D;
222 params.num_levels = 1;
223 params.emulated_levels = 1;
224 params.is_layered = false;
225 return params;
226}
227
228SurfaceParams SurfaceParams::CreateForFermiCopySurface(
229 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
230 SurfaceParams params{};
231 params.is_tiled = !config.linear;
232 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
233 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
234 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
235 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
236 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
237 params.tile_width_spacing = 1;
238 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
239 params.component_type = ComponentTypeFromRenderTarget(config.format);
240 params.type = GetFormatType(params.pixel_format);
241 params.width = config.width;
242 params.height = config.height;
243 params.pitch = config.pitch;
244 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
245 params.target = SurfaceTarget::Texture2D;
246 params.depth = 1;
247 params.num_levels = 1;
248 params.emulated_levels = 1;
249 params.is_layered = params.IsLayered();
250 return params;
251}
252
253bool SurfaceParams::IsLayered() const {
254 switch (target) {
255 case SurfaceTarget::Texture1DArray:
256 case SurfaceTarget::Texture2DArray:
257 case SurfaceTarget::TextureCubemap:
258 case SurfaceTarget::TextureCubeArray:
259 return true;
260 default:
261 return false;
262 }
263}
264
265// Auto block resizing algorithm from:
266// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
267u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
268 if (level == 0) {
269 return this->block_height;
270 }
271
272 const u32 height_new{GetMipHeight(level)};
273 const u32 default_block_height{GetDefaultBlockHeight()};
274 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
275 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
276 return std::clamp(block_height_new, 3U, 7U) - 3U;
277}
278
279u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
280 if (level == 0) {
281 return this->block_depth;
282 }
283 if (is_layered) {
284 return 0;
285 }
286
287 const u32 depth_new{GetMipDepth(level)};
288 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
289 if (block_depth_new > 4) {
290 return 5 - (GetMipBlockHeight(level) >= 2);
291 }
292 return block_depth_new;
293}
294
295std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
296 std::size_t offset = 0;
297 for (u32 i = 0; i < level; i++) {
298 offset += GetInnerMipmapMemorySize(i, false, false);
299 }
300 return offset;
301}
302
303std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
304 std::size_t offset = 0;
305 for (u32 i = 0; i < level; i++) {
306 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
307 }
308 return offset;
309}
310
311std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
312 std::size_t offset = 0;
313 for (u32 i = 0; i < level; i++) {
314 offset += GetConvertedMipmapSize(i);
315 }
316 return offset;
317}
318
319std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
320 constexpr std::size_t rgba8_bpp = 4ULL;
321 const std::size_t width_t = GetMipWidth(level);
322 const std::size_t height_t = GetMipHeight(level);
323 const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
324 return width_t * height_t * depth_t * rgba8_bpp;
325}
326
327std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
328 std::size_t size = 0;
329 for (u32 level = 0; level < num_levels; ++level) {
330 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
331 }
332 if (is_tiled && is_layered) {
333 return Common::AlignBits(size,
334 Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
335 }
336 return size;
337}
338
339std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
340 bool uncompressed) const {
341 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
342 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
343 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
344 if (is_tiled) {
345 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
346 depth, GetMipBlockHeight(level),
347 GetMipBlockDepth(level));
348 } else if (as_host_size || IsBuffer()) {
349 return GetBytesPerPixel() * width * height * depth;
350 } else {
351 // Linear Texture Case
352 return pitch * height * depth;
353 }
354}
355
356bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
357 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
358 height, depth, pitch, num_levels, pixel_format, component_type, type, target) ==
359 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
360 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
361 rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target);
362}
363
364std::string SurfaceParams::TargetName() const {
365 switch (target) {
366 case SurfaceTarget::Texture1D:
367 return "1D";
368 case SurfaceTarget::TextureBuffer:
369 return "TexBuffer";
370 case SurfaceTarget::Texture2D:
371 return "2D";
372 case SurfaceTarget::Texture3D:
373 return "3D";
374 case SurfaceTarget::Texture1DArray:
375 return "1DArray";
376 case SurfaceTarget::Texture2DArray:
377 return "2DArray";
378 case SurfaceTarget::TextureCubemap:
379 return "Cube";
380 case SurfaceTarget::TextureCubeArray:
381 return "CubeArray";
382 default:
383 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
384 UNREACHABLE();
385 return fmt::format("TUK({})", static_cast<u32>(target));
386 }
387}
388
389} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
new file mode 100644
index 000000000..c58e7f8a4
--- /dev/null
+++ b/src/video_core/texture_cache/surface_params.h
@@ -0,0 +1,286 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/alignment.h"
8#include "common/bit_util.h"
9#include "common/cityhash.h"
10#include "common/common_types.h"
11#include "video_core/engines/fermi_2d.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/shader/shader_ir.h"
14#include "video_core/surface.h"
15#include "video_core/textures/decoders.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::SurfaceCompression;
20
21class SurfaceParams {
22public:
23 /// Creates SurfaceCachedParams from a texture configuration.
24 static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic,
25 const VideoCommon::Shader::Sampler& entry);
26
27 /// Creates SurfaceCachedParams from an image configuration.
28 static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic,
29 const VideoCommon::Shader::Image& entry);
30
31 /// Creates SurfaceCachedParams for a depth buffer configuration.
32 static SurfaceParams CreateForDepthBuffer(
33 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
34 u32 block_width, u32 block_height, u32 block_depth,
35 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
36
37 /// Creates SurfaceCachedParams from a framebuffer configuration.
38 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
39
40 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
41 static SurfaceParams CreateForFermiCopySurface(
42 const Tegra::Engines::Fermi2D::Regs::Surface& config);
43
44 std::size_t Hash() const {
45 return static_cast<std::size_t>(
46 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
47 }
48
49 bool operator==(const SurfaceParams& rhs) const;
50
51 bool operator!=(const SurfaceParams& rhs) const {
52 return !operator==(rhs);
53 }
54
55 std::size_t GetGuestSizeInBytes() const {
56 return GetInnerMemorySize(false, false, false);
57 }
58
59 std::size_t GetHostSizeInBytes() const {
60 std::size_t host_size_in_bytes;
61 if (GetCompressionType() == SurfaceCompression::Converted) {
62 // ASTC is uncompressed in software, in emulated as RGBA8
63 host_size_in_bytes = 0;
64 for (u32 level = 0; level < num_levels; ++level) {
65 host_size_in_bytes += GetConvertedMipmapSize(level);
66 }
67 } else {
68 host_size_in_bytes = GetInnerMemorySize(true, false, false);
69 }
70 return host_size_in_bytes;
71 }
72
73 u32 GetBlockAlignedWidth() const {
74 return Common::AlignUp(width, 64 / GetBytesPerPixel());
75 }
76
77 /// Returns the width of a given mipmap level.
78 u32 GetMipWidth(u32 level) const {
79 return std::max(1U, width >> level);
80 }
81
82 /// Returns the height of a given mipmap level.
83 u32 GetMipHeight(u32 level) const {
84 return std::max(1U, height >> level);
85 }
86
87 /// Returns the depth of a given mipmap level.
88 u32 GetMipDepth(u32 level) const {
89 return is_layered ? depth : std::max(1U, depth >> level);
90 }
91
92 /// Returns the block height of a given mipmap level.
93 u32 GetMipBlockHeight(u32 level) const;
94
95 /// Returns the block depth of a given mipmap level.
96 u32 GetMipBlockDepth(u32 level) const;
97
98 /// Returns the best possible row/pitch alignment for the surface.
99 u32 GetRowAlignment(u32 level) const {
100 const u32 bpp =
101 GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
102 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
103 }
104
105 /// Returns the offset in bytes in guest memory of a given mipmap level.
106 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
107
108 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
109 std::size_t GetHostMipmapLevelOffset(u32 level) const;
110
111 /// Returns the offset in bytes in host memory (linear) of a given mipmap level
112 /// for a texture that is converted in host gpu.
113 std::size_t GetConvertedMipmapOffset(u32 level) const;
114
115 /// Returns the size in bytes in guest memory of a given mipmap level.
116 std::size_t GetGuestMipmapSize(u32 level) const {
117 return GetInnerMipmapMemorySize(level, false, false);
118 }
119
120 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
121 std::size_t GetHostMipmapSize(u32 level) const {
122 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
123 }
124
125 std::size_t GetConvertedMipmapSize(u32 level) const;
126
127 /// Returns the size of a layer in bytes in guest memory.
128 std::size_t GetGuestLayerSize() const {
129 return GetLayerSize(false, false);
130 }
131
132 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
133 std::size_t GetHostLayerSize(u32 level) const {
134 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
135 return GetInnerMipmapMemorySize(level, true, false);
136 }
137
138 /// Returns the max possible mipmap that the texture can have in host gpu
139 u32 MaxPossibleMipmap() const {
140 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
141 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
142 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
143 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
144 return max_mipmap;
145 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
146 }
147
148 /// Returns if the guest surface is a compressed surface.
149 bool IsCompressed() const {
150 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
151 }
152
153 /// Returns the default block width.
154 u32 GetDefaultBlockWidth() const {
155 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
156 }
157
158 /// Returns the default block height.
159 u32 GetDefaultBlockHeight() const {
160 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
161 }
162
163 /// Returns the bits per pixel.
164 u32 GetBitsPerPixel() const {
165 return VideoCore::Surface::GetFormatBpp(pixel_format);
166 }
167
168 /// Returns the bytes per pixel.
169 u32 GetBytesPerPixel() const {
170 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
171 }
172
173 /// Returns true if the pixel format is a depth and/or stencil format.
174 bool IsPixelFormatZeta() const {
175 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
176 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
177 }
178
179 /// Returns how the compression should be handled for this texture.
180 SurfaceCompression GetCompressionType() const {
181 return VideoCore::Surface::GetFormatCompressionType(pixel_format);
182 }
183
184 /// Returns is the surface is a TextureBuffer type of surface.
185 bool IsBuffer() const {
186 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
187 }
188
189 /// Returns the debug name of the texture for use in graphic debuggers.
190 std::string TargetName() const;
191
192 // Helper used for out of class size calculations
193 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
194 const u32 block_depth) {
195 return Common::AlignBits(out_size,
196 Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
197 }
198
199 /// Converts a width from a type of surface into another. This helps represent the
200 /// equivalent value between compressed/non-compressed textures.
201 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
202 VideoCore::Surface::PixelFormat pixel_format_to) {
203 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
204 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
205 return (width * bw2 + bw1 - 1) / bw1;
206 }
207
208 /// Converts a height from a type of surface into another. This helps represent the
209 /// equivalent value between compressed/non-compressed textures.
210 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
211 VideoCore::Surface::PixelFormat pixel_format_to) {
212 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
213 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
214 return (height * bh2 + bh1 - 1) / bh1;
215 }
216
217 // Finds the maximun possible width between 2 2D layers of different formats
218 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
219 const u32 src_level, const u32 dst_level) {
220 const u32 bw1 = src_params.GetDefaultBlockWidth();
221 const u32 bw2 = dst_params.GetDefaultBlockWidth();
222 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
223 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
224 return std::min(t_src_width, t_dst_width);
225 }
226
227 // Finds the maximun possible height between 2 2D layers of different formats
228 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
229 const u32 src_level, const u32 dst_level) {
230 const u32 bh1 = src_params.GetDefaultBlockHeight();
231 const u32 bh2 = dst_params.GetDefaultBlockHeight();
232 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
233 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
234 return std::min(t_src_height, t_dst_height);
235 }
236
237 bool is_tiled;
238 bool srgb_conversion;
239 bool is_layered;
240 u32 block_width;
241 u32 block_height;
242 u32 block_depth;
243 u32 tile_width_spacing;
244 u32 width;
245 u32 height;
246 u32 depth;
247 u32 pitch;
248 u32 num_levels;
249 u32 emulated_levels;
250 VideoCore::Surface::PixelFormat pixel_format;
251 VideoCore::Surface::ComponentType component_type;
252 VideoCore::Surface::SurfaceType type;
253 VideoCore::Surface::SurfaceTarget target;
254
255private:
256 /// Returns the size of a given mipmap level inside a layer.
257 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
258
259 /// Returns the size of all mipmap levels and aligns as needed.
260 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
261 return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
262 }
263
264 /// Returns the size of a layer
265 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
266
267 std::size_t GetNumLayers() const {
268 return is_layered ? depth : 1;
269 }
270
271 /// Returns true if these parameters are from a layered surface.
272 bool IsLayered() const;
273};
274
275} // namespace VideoCommon
276
277namespace std {
278
279template <>
280struct hash<VideoCommon::SurfaceParams> {
281 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
282 return k.Hash();
283 }
284};
285
286} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
new file mode 100644
index 000000000..57a1f5803
--- /dev/null
+++ b/src/video_core/texture_cache/surface_view.cpp
@@ -0,0 +1,23 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
new file mode 100644
index 000000000..b17fd11a9
--- /dev/null
+++ b/src/video_core/texture_cache/surface_view.h
@@ -0,0 +1,67 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer,
17 u32 num_layers, u32 base_level, u32 num_levels)
18 : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
19 num_levels{num_levels} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24
25 bool IsLayered() const {
26 switch (target) {
27 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
28 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
29 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
30 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
31 return true;
32 default:
33 return false;
34 }
35 }
36
37 VideoCore::Surface::SurfaceTarget target{};
38 u32 base_layer{};
39 u32 num_layers{};
40 u32 base_level{};
41 u32 num_levels{};
42};
43
44class ViewBase {
45public:
46 constexpr explicit ViewBase(const ViewParams& params) : params{params} {}
47
48 constexpr const ViewParams& GetViewParams() const {
49 return params;
50 }
51
52protected:
53 ViewParams params;
54};
55
56} // namespace VideoCommon
57
58namespace std {
59
60template <>
61struct hash<VideoCommon::ViewParams> {
62 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
63 return k.Hash();
64 }
65};
66
67} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
new file mode 100644
index 000000000..877c6635d
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -0,0 +1,835 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <memory>
10#include <mutex>
11#include <set>
12#include <tuple>
13#include <unordered_map>
14#include <vector>
15
16#include <boost/icl/interval_map.hpp>
17#include <boost/range/iterator_range.hpp>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "common/math_util.h"
22#include "core/core.h"
23#include "core/memory.h"
24#include "core/settings.h"
25#include "video_core/engines/fermi_2d.h"
26#include "video_core/engines/maxwell_3d.h"
27#include "video_core/gpu.h"
28#include "video_core/memory_manager.h"
29#include "video_core/rasterizer_interface.h"
30#include "video_core/surface.h"
31#include "video_core/texture_cache/copy_params.h"
32#include "video_core/texture_cache/surface_base.h"
33#include "video_core/texture_cache/surface_params.h"
34#include "video_core/texture_cache/surface_view.h"
35
36namespace Tegra::Texture {
37struct FullTextureInfo;
38}
39
40namespace VideoCore {
41class RasterizerInterface;
42}
43
44namespace VideoCommon {
45
46using VideoCore::Surface::PixelFormat;
47
48using VideoCore::Surface::SurfaceTarget;
49using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
50
51template <typename TSurface, typename TView>
52class TextureCache {
53 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
54 using IntervalType = typename IntervalMap::interval_type;
55
56public:
57 void InvalidateRegion(CacheAddr addr, std::size_t size) {
58 std::lock_guard lock{mutex};
59
60 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
61 Unregister(surface);
62 }
63 }
64
65 /***
66 * `Guard` guarantees that rendertargets don't unregister themselves if the
67 * collide. Protection is currently only done on 3D slices.
68 ***/
69 void GuardRenderTargets(bool new_guard) {
70 guard_render_targets = new_guard;
71 }
72
73 void GuardSamplers(bool new_guard) {
74 guard_samplers = new_guard;
75 }
76
77 void FlushRegion(CacheAddr addr, std::size_t size) {
78 std::lock_guard lock{mutex};
79
80 auto surfaces = GetSurfacesInRegion(addr, size);
81 if (surfaces.empty()) {
82 return;
83 }
84 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
85 return a->GetModificationTick() < b->GetModificationTick();
86 });
87 for (const auto& surface : surfaces) {
88 FlushSurface(surface);
89 }
90 }
91
92 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
93 const VideoCommon::Shader::Sampler& entry) {
94 std::lock_guard lock{mutex};
95 const auto gpu_addr{tic.Address()};
96 if (!gpu_addr) {
97 return {};
98 }
99 const auto params{SurfaceParams::CreateForTexture(tic, entry)};
100 const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
101 if (guard_samplers) {
102 sampled_textures.push_back(surface);
103 }
104 return view;
105 }
106
107 TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
108 const VideoCommon::Shader::Image& entry) {
109 std::lock_guard lock{mutex};
110 const auto gpu_addr{tic.Address()};
111 if (!gpu_addr) {
112 return {};
113 }
114 const auto params{SurfaceParams::CreateForImage(tic, entry)};
115 const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
116 if (guard_samplers) {
117 sampled_textures.push_back(surface);
118 }
119 return view;
120 }
121
122 bool TextureBarrier() {
123 const bool any_rt =
124 std::any_of(sampled_textures.begin(), sampled_textures.end(),
125 [](const auto& surface) { return surface->IsRenderTarget(); });
126 sampled_textures.clear();
127 return any_rt;
128 }
129
130 TView GetDepthBufferSurface(bool preserve_contents) {
131 std::lock_guard lock{mutex};
132 auto& maxwell3d = system.GPU().Maxwell3D();
133
134 if (!maxwell3d.dirty.depth_buffer) {
135 return depth_buffer.view;
136 }
137 maxwell3d.dirty.depth_buffer = false;
138
139 const auto& regs{maxwell3d.regs};
140 const auto gpu_addr{regs.zeta.Address()};
141 if (!gpu_addr || !regs.zeta_enable) {
142 SetEmptyDepthBuffer();
143 return {};
144 }
145 const auto depth_params{SurfaceParams::CreateForDepthBuffer(
146 system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
147 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
148 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
149 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
150 if (depth_buffer.target)
151 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
152 depth_buffer.target = surface_view.first;
153 depth_buffer.view = surface_view.second;
154 if (depth_buffer.target)
155 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
156 return surface_view.second;
157 }
158
159 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
160 std::lock_guard lock{mutex};
161 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
162 auto& maxwell3d = system.GPU().Maxwell3D();
163 if (!maxwell3d.dirty.render_target[index]) {
164 return render_targets[index].view;
165 }
166 maxwell3d.dirty.render_target[index] = false;
167
168 const auto& regs{maxwell3d.regs};
169 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
170 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
171 SetEmptyColorBuffer(index);
172 return {};
173 }
174
175 const auto& config{regs.rt[index]};
176 const auto gpu_addr{config.Address()};
177 if (!gpu_addr) {
178 SetEmptyColorBuffer(index);
179 return {};
180 }
181
182 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
183 preserve_contents, true);
184 if (render_targets[index].target)
185 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
186 render_targets[index].target = surface_view.first;
187 render_targets[index].view = surface_view.second;
188 if (render_targets[index].target)
189 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
190 return surface_view.second;
191 }
192
193 void MarkColorBufferInUse(std::size_t index) {
194 if (auto& render_target = render_targets[index].target) {
195 render_target->MarkAsModified(true, Tick());
196 }
197 }
198
199 void MarkDepthBufferInUse() {
200 if (depth_buffer.target) {
201 depth_buffer.target->MarkAsModified(true, Tick());
202 }
203 }
204
205 void SetEmptyDepthBuffer() {
206 if (depth_buffer.target == nullptr) {
207 return;
208 }
209 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
210 depth_buffer.target = nullptr;
211 depth_buffer.view = nullptr;
212 }
213
214 void SetEmptyColorBuffer(std::size_t index) {
215 if (render_targets[index].target == nullptr) {
216 return;
217 }
218 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
219 render_targets[index].target = nullptr;
220 render_targets[index].view = nullptr;
221 }
222
223 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
224 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
225 const Tegra::Engines::Fermi2D::Config& copy_config) {
226 std::lock_guard lock{mutex};
227 std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
228 std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
229 ImageBlit(src_surface.second, dst_surface.second, copy_config);
230 dst_surface.first->MarkAsModified(true, Tick());
231 }
232
233 TSurface TryFindFramebufferSurface(const u8* host_ptr) {
234 const CacheAddr cache_addr = ToCacheAddr(host_ptr);
235 if (!cache_addr) {
236 return nullptr;
237 }
238 const CacheAddr page = cache_addr >> registry_page_bits;
239 std::vector<TSurface>& list = registry[page];
240 for (auto& surface : list) {
241 if (surface->GetCacheAddr() == cache_addr) {
242 return surface;
243 }
244 }
245 return nullptr;
246 }
247
248 u64 Tick() {
249 return ++ticks;
250 }
251
252protected:
253 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
254 : system{system}, rasterizer{rasterizer} {
255 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
256 SetEmptyColorBuffer(i);
257 }
258
259 SetEmptyDepthBuffer();
260 staging_cache.SetSize(2);
261
262 const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
263 siblings_table[static_cast<std::size_t>(a)] = b;
264 siblings_table[static_cast<std::size_t>(b)] = a;
265 };
266 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
267 make_siblings(PixelFormat::Z16, PixelFormat::R16U);
268 make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
269 make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
270
271 sampled_textures.reserve(64);
272 }
273
274 ~TextureCache() = default;
275
276 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
277
278 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
279 const CopyParams& copy_params) = 0;
280
281 virtual void ImageBlit(TView& src_view, TView& dst_view,
282 const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
283
284 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
285 // and reading it from a sepparate buffer.
286 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
287
288 void ManageRenderTargetUnregister(TSurface& surface) {
289 auto& maxwell3d = system.GPU().Maxwell3D();
290 const u32 index = surface->GetRenderTarget();
291 if (index == DEPTH_RT) {
292 maxwell3d.dirty.depth_buffer = true;
293 } else {
294 maxwell3d.dirty.render_target[index] = true;
295 }
296 maxwell3d.dirty.render_settings = true;
297 }
298
299 void Register(TSurface surface) {
300 const GPUVAddr gpu_addr = surface->GetGpuAddr();
301 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
302 const std::size_t size = surface->GetSizeInBytes();
303 const std::optional<VAddr> cpu_addr =
304 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
305 if (!cache_ptr || !cpu_addr) {
306 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
307 gpu_addr);
308 return;
309 }
310 const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
311 surface->MarkAsContinuous(continuous);
312 surface->SetCacheAddr(cache_ptr);
313 surface->SetCpuAddr(*cpu_addr);
314 RegisterInnerCache(surface);
315 surface->MarkAsRegistered(true);
316 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
317 }
318
319 void Unregister(TSurface surface) {
320 if (guard_render_targets && surface->IsProtected()) {
321 return;
322 }
323 if (!guard_render_targets && surface->IsRenderTarget()) {
324 ManageRenderTargetUnregister(surface);
325 }
326 const std::size_t size = surface->GetSizeInBytes();
327 const VAddr cpu_addr = surface->GetCpuAddr();
328 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
329 UnregisterInnerCache(surface);
330 surface->MarkAsRegistered(false);
331 ReserveSurface(surface->GetSurfaceParams(), surface);
332 }
333
334 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
335 if (const auto surface = TryGetReservedSurface(params); surface) {
336 surface->SetGpuAddr(gpu_addr);
337 return surface;
338 }
339 // No reserved surface available, create a new one and reserve it
340 auto new_surface{CreateSurface(gpu_addr, params)};
341 return new_surface;
342 }
343
344 std::pair<TSurface, TView> GetFermiSurface(
345 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
346 SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config);
347 const GPUVAddr gpu_addr = config.Address();
348 return GetSurface(gpu_addr, params, true, false);
349 }
350
351 Core::System& system;
352
353private:
354 enum class RecycleStrategy : u32 {
355 Ignore = 0,
356 Flush = 1,
357 BufferCopy = 3,
358 };
359
360 /**
361 * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
362 * @param overlaps, the overlapping surfaces registered in the cache.
363 * @param params, the paremeters on the new surface.
364 * @param gpu_addr, the starting address of the new surface.
365 * @param untopological, tells the recycler that the texture has no way to match the overlaps
366 * due to topological reasons.
367 **/
368 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
369 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
370 if (Settings::values.use_accurate_gpu_emulation) {
371 return RecycleStrategy::Flush;
372 }
373 // 3D Textures decision
374 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
375 return RecycleStrategy::Flush;
376 }
377 for (auto s : overlaps) {
378 const auto& s_params = s->GetSurfaceParams();
379 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
380 return RecycleStrategy::Flush;
381 }
382 }
383 // Untopological decision
384 if (untopological == MatchTopologyResult::CompressUnmatch) {
385 return RecycleStrategy::Flush;
386 }
387 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
388 return RecycleStrategy::Flush;
389 }
390 return RecycleStrategy::Ignore;
391 }
392
393 /**
394 * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
395 *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
396 *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
397 *new surface from that data.
398 * @param overlaps, the overlapping surfaces registered in the cache.
399 * @param params, the paremeters on the new surface.
400 * @param gpu_addr, the starting address of the new surface.
401 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank
402 * @param untopological, tells the recycler that the texture has no way to match the overlaps
403 * due to topological reasons.
404 **/
405 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
406 const SurfaceParams& params, const GPUVAddr gpu_addr,
407 const bool preserve_contents,
408 const MatchTopologyResult untopological) {
409 const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
410 for (auto& surface : overlaps) {
411 Unregister(surface);
412 }
413 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
414 case RecycleStrategy::Ignore: {
415 return InitializeSurface(gpu_addr, params, do_load);
416 }
417 case RecycleStrategy::Flush: {
418 std::sort(overlaps.begin(), overlaps.end(),
419 [](const TSurface& a, const TSurface& b) -> bool {
420 return a->GetModificationTick() < b->GetModificationTick();
421 });
422 for (auto& surface : overlaps) {
423 FlushSurface(surface);
424 }
425 return InitializeSurface(gpu_addr, params, preserve_contents);
426 }
427 case RecycleStrategy::BufferCopy: {
428 auto new_surface = GetUncachedSurface(gpu_addr, params);
429 BufferCopy(overlaps[0], new_surface);
430 return {new_surface, new_surface->GetMainView()};
431 }
432 default: {
433 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
434 return InitializeSurface(gpu_addr, params, do_load);
435 }
436 }
437 }
438
439 /**
440 * `RebuildSurface` this method takes a single surface and recreates into another that
441 * may differ in format, target or width alingment.
442 * @param current_surface, the registered surface in the cache which we want to convert.
443 * @param params, the new surface params which we'll use to recreate the surface.
444 **/
445 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
446 bool is_render) {
447 const auto gpu_addr = current_surface->GetGpuAddr();
448 const auto& cr_params = current_surface->GetSurfaceParams();
449 TSurface new_surface;
450 if (cr_params.pixel_format != params.pixel_format && !is_render &&
451 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
452 SurfaceParams new_params = params;
453 new_params.pixel_format = cr_params.pixel_format;
454 new_params.component_type = cr_params.component_type;
455 new_params.type = cr_params.type;
456 new_surface = GetUncachedSurface(gpu_addr, new_params);
457 } else {
458 new_surface = GetUncachedSurface(gpu_addr, params);
459 }
460 const auto& final_params = new_surface->GetSurfaceParams();
461 if (cr_params.type != final_params.type ||
462 (cr_params.component_type != final_params.component_type)) {
463 BufferCopy(current_surface, new_surface);
464 } else {
465 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
466 for (auto& brick : bricks) {
467 ImageCopy(current_surface, new_surface, brick);
468 }
469 }
470 Unregister(current_surface);
471 Register(new_surface);
472 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
473 return {new_surface, new_surface->GetMainView()};
474 }
475
476 /**
477 * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
478 * params if it's an exact match, we return the main view of the registered surface. If it's
479 * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
480 * match but the targets don't, we create an overview View of the registered surface.
481 * @param current_surface, the registered surface in the cache which we want to convert.
482 * @param params, the new surface params which we want to check.
483 **/
484 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
485 const SurfaceParams& params, bool is_render) {
486 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
487 const bool matches_target = current_surface->MatchTarget(params.target);
488 const auto match_check = [&]() -> std::pair<TSurface, TView> {
489 if (matches_target) {
490 return {current_surface, current_surface->GetMainView()};
491 }
492 return {current_surface, current_surface->EmplaceOverview(params)};
493 };
494 if (!is_mirage) {
495 return match_check();
496 }
497 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
498 return match_check();
499 }
500 return RebuildSurface(current_surface, params, is_render);
501 }
502
503 /**
504 * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
505 * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
506 * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
507 * for them, else we return nothing.
508 * @param overlaps, the overlapping surfaces registered in the cache.
509 * @param params, the paremeters on the new surface.
510 * @param gpu_addr, the starting address of the new surface.
511 **/
512 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
513 const SurfaceParams& params,
514 const GPUVAddr gpu_addr) {
515 if (params.target == SurfaceTarget::Texture3D) {
516 return {};
517 }
518 bool modified = false;
519 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
520 u32 passed_tests = 0;
521 for (auto& surface : overlaps) {
522 const SurfaceParams& src_params = surface->GetSurfaceParams();
523 if (src_params.is_layered || src_params.num_levels > 1) {
524 // We send this cases to recycle as they are more complex to handle
525 return {};
526 }
527 const std::size_t candidate_size = surface->GetSizeInBytes();
528 auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
529 if (!mipmap_layer) {
530 continue;
531 }
532 const auto [layer, mipmap] = *mipmap_layer;
533 if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
534 continue;
535 }
536 modified |= surface->IsModified();
537 // Now we got all the data set up
538 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
539 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
540 const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
541 passed_tests++;
542 ImageCopy(surface, new_surface, copy_params);
543 }
544 if (passed_tests == 0) {
545 return {};
546 // In Accurate GPU all tests should pass, else we recycle
547 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
548 return {};
549 }
550 for (auto surface : overlaps) {
551 Unregister(surface);
552 }
553 new_surface->MarkAsModified(modified, Tick());
554 Register(new_surface);
555 return {{new_surface, new_surface->GetMainView()}};
556 }
557
558 /**
559 * `GetSurface` gets the starting address and parameters of a candidate surface and tries
560 * to find a matching surface within the cache. This is done in 3 big steps. The first is to
561 * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
562 * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
563 * memory else we move to step 3. Step 3 consists on figuring the relationship between the
564 * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
565 * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
566 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
567 * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
568 * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
569 * @param gpu_addr, the starting address of the candidate surface.
570 * @param params, the paremeters on the candidate surface.
571 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
572 **/
573 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
574 bool preserve_contents, bool is_render) {
575 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
576 const auto cache_addr{ToCacheAddr(host_ptr)};
577
578 // Step 0: guarantee a valid surface
579 if (!cache_addr) {
580 // Return a null surface if it's invalid
581 SurfaceParams new_params = params;
582 new_params.width = 1;
583 new_params.height = 1;
584 new_params.depth = 1;
585 new_params.block_height = 0;
586 new_params.block_depth = 0;
587 return InitializeSurface(gpu_addr, new_params, false);
588 }
589
590 // Step 1
591 // Check Level 1 Cache for a fast structural match. If candidate surface
592 // matches at certain level we are pretty much done.
593 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
594 TSurface& current_surface = iter->second;
595 const auto topological_result = current_surface->MatchesTopology(params);
596 if (topological_result != MatchTopologyResult::FullMatch) {
597 std::vector<TSurface> overlaps{current_surface};
598 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
599 topological_result);
600 }
601 const auto struct_result = current_surface->MatchesStructure(params);
602 if (struct_result != MatchStructureResult::None &&
603 (params.target != SurfaceTarget::Texture3D ||
604 current_surface->MatchTarget(params.target))) {
605 if (struct_result == MatchStructureResult::FullMatch) {
606 return ManageStructuralMatch(current_surface, params, is_render);
607 } else {
608 return RebuildSurface(current_surface, params, is_render);
609 }
610 }
611 }
612
613 // Step 2
614 // Obtain all possible overlaps in the memory region
615 const std::size_t candidate_size = params.GetGuestSizeInBytes();
616 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
617
618 // If none are found, we are done. we just load the surface and create it.
619 if (overlaps.empty()) {
620 return InitializeSurface(gpu_addr, params, preserve_contents);
621 }
622
623 // Step 3
624 // Now we need to figure the relationship between the texture and its overlaps
625 // we do a topological test to ensure we can find some relationship. If it fails
626 // inmediatly recycle the texture
627 for (const auto& surface : overlaps) {
628 const auto topological_result = surface->MatchesTopology(params);
629 if (topological_result != MatchTopologyResult::FullMatch) {
630 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
631 topological_result);
632 }
633 }
634
635 // Split cases between 1 overlap or many.
636 if (overlaps.size() == 1) {
637 TSurface current_surface = overlaps[0];
638 // First check if the surface is within the overlap. If not, it means
639 // two things either the candidate surface is a supertexture of the overlap
640 // or they don't match in any known way.
641 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
642 if (current_surface->GetGpuAddr() == gpu_addr) {
643 std::optional<std::pair<TSurface, TView>> view =
644 TryReconstructSurface(overlaps, params, gpu_addr);
645 if (view) {
646 return *view;
647 }
648 }
649 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
650 MatchTopologyResult::FullMatch);
651 }
652 // Now we check if the candidate is a mipmap/layer of the overlap
653 std::optional<TView> view =
654 current_surface->EmplaceView(params, gpu_addr, candidate_size);
655 if (view) {
656 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
657 if (is_mirage) {
658 // On a mirage view, we need to recreate the surface under this new view
659 // and then obtain a view again.
660 SurfaceParams new_params = current_surface->GetSurfaceParams();
661 const u32 wh = SurfaceParams::ConvertWidth(
662 new_params.width, new_params.pixel_format, params.pixel_format);
663 const u32 hh = SurfaceParams::ConvertHeight(
664 new_params.height, new_params.pixel_format, params.pixel_format);
665 new_params.width = wh;
666 new_params.height = hh;
667 new_params.pixel_format = params.pixel_format;
668 std::pair<TSurface, TView> pair =
669 RebuildSurface(current_surface, new_params, is_render);
670 std::optional<TView> mirage_view =
671 pair.first->EmplaceView(params, gpu_addr, candidate_size);
672 if (mirage_view)
673 return {pair.first, *mirage_view};
674 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
675 MatchTopologyResult::FullMatch);
676 }
677 return {current_surface, *view};
678 }
679 } else {
680 // If there are many overlaps, odds are they are subtextures of the candidate
681 // surface. We try to construct a new surface based on the candidate parameters,
682 // using the overlaps. If a single overlap fails, this will fail.
683 std::optional<std::pair<TSurface, TView>> view =
684 TryReconstructSurface(overlaps, params, gpu_addr);
685 if (view) {
686 return *view;
687 }
688 }
689 // We failed all the tests, recycle the overlaps into a new texture.
690 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
691 MatchTopologyResult::FullMatch);
692 }
693
694 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
695 bool preserve_contents) {
696 auto new_surface{GetUncachedSurface(gpu_addr, params)};
697 Register(new_surface);
698 if (preserve_contents) {
699 LoadSurface(new_surface);
700 }
701 return {new_surface, new_surface->GetMainView()};
702 }
703
704 void LoadSurface(const TSurface& surface) {
705 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
706 surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
707 surface->UploadTexture(staging_cache.GetBuffer(0));
708 surface->MarkAsModified(false, Tick());
709 }
710
711 void FlushSurface(const TSurface& surface) {
712 if (!surface->IsModified()) {
713 return;
714 }
715 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
716 surface->DownloadTexture(staging_cache.GetBuffer(0));
717 surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
718 surface->MarkAsModified(false, Tick());
719 }
720
721 void RegisterInnerCache(TSurface& surface) {
722 const CacheAddr cache_addr = surface->GetCacheAddr();
723 CacheAddr start = cache_addr >> registry_page_bits;
724 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
725 l1_cache[cache_addr] = surface;
726 while (start <= end) {
727 registry[start].push_back(surface);
728 start++;
729 }
730 }
731
732 void UnregisterInnerCache(TSurface& surface) {
733 const CacheAddr cache_addr = surface->GetCacheAddr();
734 CacheAddr start = cache_addr >> registry_page_bits;
735 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
736 l1_cache.erase(cache_addr);
737 while (start <= end) {
738 auto& reg{registry[start]};
739 reg.erase(std::find(reg.begin(), reg.end(), surface));
740 start++;
741 }
742 }
743
744 std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
745 if (size == 0) {
746 return {};
747 }
748 const CacheAddr cache_addr_end = cache_addr + size;
749 CacheAddr start = cache_addr >> registry_page_bits;
750 const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
751 std::vector<TSurface> surfaces;
752 while (start <= end) {
753 std::vector<TSurface>& list = registry[start];
754 for (auto& surface : list) {
755 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
756 surface->MarkAsPicked(true);
757 surfaces.push_back(surface);
758 }
759 }
760 start++;
761 }
762 for (auto& surface : surfaces) {
763 surface->MarkAsPicked(false);
764 }
765 return surfaces;
766 }
767
768 void ReserveSurface(const SurfaceParams& params, TSurface surface) {
769 surface_reserve[params].push_back(std::move(surface));
770 }
771
772 TSurface TryGetReservedSurface(const SurfaceParams& params) {
773 auto search{surface_reserve.find(params)};
774 if (search == surface_reserve.end()) {
775 return {};
776 }
777 for (auto& surface : search->second) {
778 if (!surface->IsRegistered()) {
779 return surface;
780 }
781 }
782 return {};
783 }
784
785 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
786 return siblings_table[static_cast<std::size_t>(format)];
787 }
788
789 struct FramebufferTargetInfo {
790 TSurface target;
791 TView view;
792 };
793
794 VideoCore::RasterizerInterface& rasterizer;
795
796 u64 ticks{};
797
798 // Guards the cache for protection conflicts.
799 bool guard_render_targets{};
800 bool guard_samplers{};
801
802 // The siblings table is for formats that can inter exchange with one another
803 // without causing issues. This is only valid when a conflict occurs on a non
804 // rendering use.
805 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
806
807 // The internal Cache is different for the Texture Cache. It's based on buckets
808 // of 1MB. This fits better for the purpose of this cache as textures are normaly
809 // large in size.
810 static constexpr u64 registry_page_bits{20};
811 static constexpr u64 registry_page_size{1 << registry_page_bits};
812 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
813
814 static constexpr u32 DEPTH_RT = 8;
815 static constexpr u32 NO_RT = 0xFFFFFFFF;
816
817 // The L1 Cache is used for fast texture lookup before checking the overlaps
818 // This avoids calculating size and other stuffs.
819 std::unordered_map<CacheAddr, TSurface> l1_cache;
820
821 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
822 /// previously been used. This is to prevent surfaces from being constantly created and
823 /// destroyed when used with different surface parameters.
824 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
825 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
826 render_targets;
827 FramebufferTargetInfo depth_buffer;
828
829 std::vector<TSurface> sampled_textures;
830
831 StagingCache staging_cache;
832 std::recursive_mutex mutex;
833};
834
835} // namespace VideoCommon
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
index 82050bd51..f3efa7eb0 100644
--- a/src/video_core/textures/convert.cpp
+++ b/src/video_core/textures/convert.cpp
@@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height); 62 SwapS8Z24ToZ24S8<true>(data, width, height);
63} 63}
64 64
65void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, 65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 bool convert_astc, bool convert_s8z24) { 66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) { 67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. 68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{}; 69 u32 block_width{};
70 u32 block_height{}; 70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); 71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = 72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); 73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), data); 74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75 75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { 76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); 77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 } 78 }
79} 79}
80 80
@@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h
90 } 90 }
91} 91}
92 92
93} // namespace Tegra::Texture \ No newline at end of file 93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
index 12542e71c..d5d6c77bb 100644
--- a/src/video_core/textures/convert.h
+++ b/src/video_core/textures/convert.h
@@ -12,10 +12,11 @@ enum class PixelFormat;
12 12
13namespace Tegra::Texture { 13namespace Tegra::Texture {
14 14
15void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, 15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 height, u32 depth, bool convert_astc, bool convert_s8z24); 16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
17 18
18void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, 19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
19 u32 height, u32 depth, bool convert_astc, bool convert_s8z24); 20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
20 21
21} // namespace Tegra::Texture \ No newline at end of file 22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 217805386..7df5f1452 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable {
36 std::array<std::array<u16, M>, N> values{}; 36 std::array<std::array<u16, M>, N> values{};
37}; 37};
38 38
39constexpr u32 gob_size_x = 64; 39constexpr u32 gob_size_x_shift = 6;
40constexpr u32 gob_size_y = 8; 40constexpr u32 gob_size_y_shift = 3;
41constexpr u32 gob_size_z = 1; 41constexpr u32 gob_size_z_shift = 0;
42constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; 42constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift;
43
44constexpr u32 gob_size_x = 1U << gob_size_x_shift;
45constexpr u32 gob_size_y = 1U << gob_size_y_shift;
46constexpr u32 gob_size_z = 1U << gob_size_z_shift;
47constexpr u32 gob_size = 1U << gob_size_shift;
48
43constexpr u32 fast_swizzle_align = 16; 49constexpr u32 fast_swizzle_align = 16;
44 50
45constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); 51constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>();
@@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 177void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 178 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { 179 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
180 const u32 block_height_size{1U << block_height};
181 const u32 block_depth_size{1U << block_depth};
174 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { 182 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
175 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, 183 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
176 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, 184 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
177 width_spacing); 185 block_depth_size, width_spacing);
178 } else { 186 } else {
179 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, 187 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
180 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, 188 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
181 width_spacing); 189 block_depth_size, width_spacing);
182 } 190 }
183} 191}
184 192
@@ -248,18 +256,22 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
248} 256}
249 257
250void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 258void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
251 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { 259 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
260 u32 block_height_bit, u32 offset_x, u32 offset_y) {
261 const u32 block_height = 1U << block_height_bit;
252 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 262 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
253 gob_size_x}; 263 gob_size_x};
254 for (u32 line = 0; line < subrect_height; ++line) { 264 for (u32 line = 0; line < subrect_height; ++line) {
265 const u32 dst_y = line + offset_y;
255 const u32 gob_address_y = 266 const u32 gob_address_y =
256 (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + 267 (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
257 ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; 268 ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
258 const auto& table = legacy_swizzle_table[line % gob_size_y]; 269 const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
259 for (u32 x = 0; x < subrect_width; ++x) { 270 for (u32 x = 0; x < subrect_width; ++x) {
271 const u32 dst_x = x + offset_x;
260 const u32 gob_address = 272 const u32 gob_address =
261 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 273 gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
262 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 274 const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
263 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 275 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
264 u8* dest_addr = swizzled_data + swizzled_offset; 276 u8* dest_addr = swizzled_data + swizzled_offset;
265 277
@@ -269,8 +281,9 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
269} 281}
270 282
271void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 283void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
272 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, 284 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
273 u32 offset_x, u32 offset_y) { 285 u32 block_height_bit, u32 offset_x, u32 offset_y) {
286 const u32 block_height = 1U << block_height_bit;
274 for (u32 line = 0; line < subrect_height; ++line) { 287 for (u32 line = 0; line < subrect_height; ++line) {
275 const u32 y2 = line + offset_y; 288 const u32 y2 = line + offset_y;
276 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + 289 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -289,8 +302,9 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
289} 302}
290 303
291void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, 304void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
292 const u32 block_height, const std::size_t copy_size, const u8* source_data, 305 const u32 block_height_bit, const std::size_t copy_size, const u8* source_data,
293 u8* swizzle_data) { 306 u8* swizzle_data) {
307 const u32 block_height = 1U << block_height_bit;
294 const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; 308 const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
295 std::size_t count = 0; 309 std::size_t count = 0;
296 for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { 310 for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
@@ -356,9 +370,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
356std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 370std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
357 u32 block_height, u32 block_depth) { 371 u32 block_height, u32 block_depth) {
358 if (tiled) { 372 if (tiled) {
359 const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); 373 const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift);
360 const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); 374 const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height);
361 const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); 375 const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth);
362 return aligned_width * aligned_height * aligned_depth; 376 return aligned_width * aligned_height * aligned_depth;
363 } else { 377 } else {
364 return width * height * depth * bytes_per_pixel; 378 return width * height * depth * bytes_per_pixel;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index e072d8401..f1e3952bc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -12,8 +12,8 @@ namespace Tegra::Texture {
12 12
13// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents 13// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
14// an small rect of (64/bytes_per_pixel)X8. 14// an small rect of (64/bytes_per_pixel)X8.
15inline std::size_t GetGOBSize() { 15inline std::size_t GetGOBSizeShift() {
16 return 512; 16 return 9;
17} 17}
18 18
19/// Unswizzles a swizzled texture without changing its format. 19/// Unswizzles a swizzled texture without changing its format.
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
44 44
45/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
48 u32 offset_x, u32 offset_y);
48 49
49/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 219bfd559..e36bc2c04 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -52,9 +52,9 @@ enum class TextureFormat : u32 {
52 DXT45 = 0x26, 52 DXT45 = 0x26,
53 DXN1 = 0x27, 53 DXN1 = 0x27,
54 DXN2 = 0x28, 54 DXN2 = 0x28,
55 Z24S8 = 0x29, 55 S8Z24 = 0x29,
56 X8Z24 = 0x2a, 56 X8Z24 = 0x2a,
57 S8Z24 = 0x2b, 57 Z24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4Z24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4Z24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8Z24__COV4R12V = 0x2e,
@@ -172,12 +172,16 @@ struct TICEntry {
172 BitField<26, 1, u32> use_header_opt_control; 172 BitField<26, 1, u32> use_header_opt_control;
173 BitField<27, 1, u32> depth_texture; 173 BitField<27, 1, u32> depth_texture;
174 BitField<28, 4, u32> max_mip_level; 174 BitField<28, 4, u32> max_mip_level;
175
176 BitField<0, 16, u32> buffer_high_width_minus_one;
175 }; 177 };
176 union { 178 union {
177 BitField<0, 16, u32> width_minus_1; 179 BitField<0, 16, u32> width_minus_1;
178 BitField<22, 1, u32> srgb_conversion; 180 BitField<22, 1, u32> srgb_conversion;
179 BitField<23, 4, TextureType> texture_type; 181 BitField<23, 4, TextureType> texture_type;
180 BitField<29, 3, u32> border_size; 182 BitField<29, 3, u32> border_size;
183
184 BitField<0, 16, u32> buffer_low_width_minus_one;
181 }; 185 };
182 union { 186 union {
183 BitField<0, 16, u32> height_minus_1; 187 BitField<0, 16, u32> height_minus_1;
@@ -206,7 +210,10 @@ struct TICEntry {
206 } 210 }
207 211
208 u32 Width() const { 212 u32 Width() const {
209 return width_minus_1 + 1; 213 if (header_version != TICHeaderVersion::OneDBuffer) {
214 return width_minus_1 + 1;
215 }
216 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
210 } 217 }
211 218
212 u32 Height() const { 219 u32 Height() const {
@@ -219,20 +226,17 @@ struct TICEntry {
219 226
220 u32 BlockWidth() const { 227 u32 BlockWidth() const {
221 ASSERT(IsTiled()); 228 ASSERT(IsTiled());
222 // The block height is stored in log2 format. 229 return block_width;
223 return 1 << block_width;
224 } 230 }
225 231
226 u32 BlockHeight() const { 232 u32 BlockHeight() const {
227 ASSERT(IsTiled()); 233 ASSERT(IsTiled());
228 // The block height is stored in log2 format. 234 return block_height;
229 return 1 << block_height;
230 } 235 }
231 236
232 u32 BlockDepth() const { 237 u32 BlockDepth() const {
233 ASSERT(IsTiled()); 238 ASSERT(IsTiled());
234 // The block height is stored in log2 format. 239 return block_depth;
235 return 1 << block_depth;
236 } 240 }
237 241
238 bool IsTiled() const { 242 bool IsTiled() const {
@@ -240,6 +244,15 @@ struct TICEntry {
240 header_version == TICHeaderVersion::BlockLinearColorKey; 244 header_version == TICHeaderVersion::BlockLinearColorKey;
241 } 245 }
242 246
247 bool IsLineal() const {
248 return header_version == TICHeaderVersion::Pitch ||
249 header_version == TICHeaderVersion::PitchColorKey;
250 }
251
252 bool IsBuffer() const {
253 return header_version == TICHeaderVersion::OneDBuffer;
254 }
255
243 bool IsSrgbConversionEnabled() const { 256 bool IsSrgbConversionEnabled() const {
244 return srgb_conversion != 0; 257 return srgb_conversion != 0;
245 } 258 }
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 3dc0e47d0..f051e17b4 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -1,5 +1,6 @@
1set(CMAKE_AUTOMOC ON) 1set(CMAKE_AUTOMOC ON)
2set(CMAKE_AUTORCC ON) 2set(CMAKE_AUTORCC ON)
3set(CMAKE_AUTOUIC ON)
3set(CMAKE_INCLUDE_CURRENT_DIR ON) 4set(CMAKE_INCLUDE_CURRENT_DIR ON)
4set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) 5set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
5 6
@@ -7,6 +8,7 @@ add_executable(yuzu
7 Info.plist 8 Info.plist
8 about_dialog.cpp 9 about_dialog.cpp
9 about_dialog.h 10 about_dialog.h
11 aboutdialog.ui
10 applets/error.cpp 12 applets/error.cpp
11 applets/error.h 13 applets/error.h
12 applets/profile_select.cpp 14 applets/profile_select.cpp
@@ -17,42 +19,59 @@ add_executable(yuzu
17 applets/web_browser.h 19 applets/web_browser.h
18 bootmanager.cpp 20 bootmanager.cpp
19 bootmanager.h 21 bootmanager.h
22 compatdb.ui
20 compatibility_list.cpp 23 compatibility_list.cpp
21 compatibility_list.h 24 compatibility_list.h
22 configuration/config.cpp 25 configuration/config.cpp
23 configuration/config.h 26 configuration/config.h
27 configuration/configure.ui
24 configuration/configure_audio.cpp 28 configuration/configure_audio.cpp
25 configuration/configure_audio.h 29 configuration/configure_audio.h
30 configuration/configure_audio.ui
26 configuration/configure_debug.cpp 31 configuration/configure_debug.cpp
27 configuration/configure_debug.h 32 configuration/configure_debug.h
33 configuration/configure_debug.ui
28 configuration/configure_dialog.cpp 34 configuration/configure_dialog.cpp
29 configuration/configure_dialog.h 35 configuration/configure_dialog.h
30 configuration/configure_gamelist.cpp 36 configuration/configure_gamelist.cpp
31 configuration/configure_gamelist.h 37 configuration/configure_gamelist.h
38 configuration/configure_gamelist.ui
32 configuration/configure_general.cpp 39 configuration/configure_general.cpp
33 configuration/configure_general.h 40 configuration/configure_general.h
41 configuration/configure_general.ui
34 configuration/configure_graphics.cpp 42 configuration/configure_graphics.cpp
35 configuration/configure_graphics.h 43 configuration/configure_graphics.h
44 configuration/configure_graphics.ui
36 configuration/configure_hotkeys.cpp 45 configuration/configure_hotkeys.cpp
37 configuration/configure_hotkeys.h 46 configuration/configure_hotkeys.h
47 configuration/configure_hotkeys.ui
38 configuration/configure_input.cpp 48 configuration/configure_input.cpp
39 configuration/configure_input.h 49 configuration/configure_input.h
50 configuration/configure_input.ui
40 configuration/configure_input_player.cpp 51 configuration/configure_input_player.cpp
41 configuration/configure_input_player.h 52 configuration/configure_input_player.h
53 configuration/configure_input_player.ui
42 configuration/configure_input_simple.cpp 54 configuration/configure_input_simple.cpp
43 configuration/configure_input_simple.h 55 configuration/configure_input_simple.h
56 configuration/configure_input_simple.ui
44 configuration/configure_mouse_advanced.cpp 57 configuration/configure_mouse_advanced.cpp
45 configuration/configure_mouse_advanced.h 58 configuration/configure_mouse_advanced.h
59 configuration/configure_mouse_advanced.ui
60 configuration/configure_per_general.cpp
61 configuration/configure_per_general.h
62 configuration/configure_per_general.ui
46 configuration/configure_profile_manager.cpp 63 configuration/configure_profile_manager.cpp
47 configuration/configure_profile_manager.h 64 configuration/configure_profile_manager.h
65 configuration/configure_profile_manager.ui
48 configuration/configure_system.cpp 66 configuration/configure_system.cpp
49 configuration/configure_system.h 67 configuration/configure_system.h
50 configuration/configure_per_general.cpp 68 configuration/configure_system.ui
51 configuration/configure_per_general.h
52 configuration/configure_touchscreen_advanced.cpp 69 configuration/configure_touchscreen_advanced.cpp
53 configuration/configure_touchscreen_advanced.h 70 configuration/configure_touchscreen_advanced.h
71 configuration/configure_touchscreen_advanced.ui
54 configuration/configure_web.cpp 72 configuration/configure_web.cpp
55 configuration/configure_web.h 73 configuration/configure_web.h
74 configuration/configure_web.ui
56 debugger/graphics/graphics_breakpoint_observer.cpp 75 debugger/graphics/graphics_breakpoint_observer.cpp
57 debugger/graphics/graphics_breakpoint_observer.h 76 debugger/graphics/graphics_breakpoint_observer.h
58 debugger/graphics/graphics_breakpoints.cpp 77 debugger/graphics/graphics_breakpoints.cpp
@@ -72,12 +91,14 @@ add_executable(yuzu
72 game_list_worker.h 91 game_list_worker.h
73 loading_screen.cpp 92 loading_screen.cpp
74 loading_screen.h 93 loading_screen.h
94 loading_screen.ui
75 hotkeys.cpp 95 hotkeys.cpp
76 hotkeys.h 96 hotkeys.h
77 main.cpp 97 main.cpp
78 main.h 98 main.h
79 ui_settings.cpp 99 main.ui
80 ui_settings.h 100 uisettings.cpp
101 uisettings.h
81 util/limitable_input_dialog.cpp 102 util/limitable_input_dialog.cpp
82 util/limitable_input_dialog.h 103 util/limitable_input_dialog.h
83 util/sequence_dialog/sequence_dialog.cpp 104 util/sequence_dialog/sequence_dialog.cpp
@@ -89,44 +110,18 @@ add_executable(yuzu
89 yuzu.rc 110 yuzu.rc
90) 111)
91 112
92set(UIS
93 aboutdialog.ui
94 configuration/configure.ui
95 configuration/configure_audio.ui
96 configuration/configure_debug.ui
97 configuration/configure_gamelist.ui
98 configuration/configure_general.ui
99 configuration/configure_graphics.ui
100 configuration/configure_hotkeys.ui
101 configuration/configure_input.ui
102 configuration/configure_input_player.ui
103 configuration/configure_input_simple.ui
104 configuration/configure_mouse_advanced.ui
105 configuration/configure_per_general.ui
106 configuration/configure_profile_manager.ui
107 configuration/configure_system.ui
108 configuration/configure_touchscreen_advanced.ui
109 configuration/configure_web.ui
110 compatdb.ui
111 loading_screen.ui
112 main.ui
113)
114
115file(GLOB COMPAT_LIST 113file(GLOB COMPAT_LIST
116 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc 114 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
117 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) 115 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
118file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) 116file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*)
119file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) 117file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*)
120 118
121qt5_wrap_ui(UI_HDRS ${UIS})
122 119
123target_sources(yuzu 120target_sources(yuzu
124 PRIVATE 121 PRIVATE
125 ${COMPAT_LIST} 122 ${COMPAT_LIST}
126 ${ICONS} 123 ${ICONS}
127 ${THEMES} 124 ${THEMES}
128 ${UI_HDRS}
129 ${UIS}
130) 125)
131 126
132if (APPLE) 127if (APPLE)
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 5a456e603..f594106bf 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -11,7 +11,7 @@
11#include "core/hle/service/hid/controllers/npad.h" 11#include "core/hle/service/hid/controllers/npad.h"
12#include "input_common/main.h" 12#include "input_common/main.h"
13#include "yuzu/configuration/config.h" 13#include "yuzu/configuration/config.h"
14#include "yuzu/ui_settings.h" 14#include "yuzu/uisettings.h"
15 15
16Config::Config() { 16Config::Config() {
17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files. 17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
@@ -436,7 +436,6 @@ void Config::ReadControlValues() {
436void Config::ReadCoreValues() { 436void Config::ReadCoreValues() {
437 qt_config->beginGroup(QStringLiteral("Core")); 437 qt_config->beginGroup(QStringLiteral("Core"));
438 438
439 Settings::values.use_cpu_jit = ReadSetting(QStringLiteral("use_cpu_jit"), true).toBool();
440 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); 439 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool();
441 440
442 qt_config->endGroup(); 441 qt_config->endGroup();
@@ -475,6 +474,7 @@ void Config::ReadDebuggingValues() {
475 Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool(); 474 Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool();
476 Settings::values.reporting_services = 475 Settings::values.reporting_services =
477 ReadSetting(QStringLiteral("reporting_services"), false).toBool(); 476 ReadSetting(QStringLiteral("reporting_services"), false).toBool();
477 Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
478 478
479 qt_config->endGroup(); 479 qt_config->endGroup();
480} 480}
@@ -516,10 +516,38 @@ void Config::ReadPathValues() {
516 516
517 UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); 517 UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString();
518 UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); 518 UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString();
519 UISettings::values.game_directory_path = 519 UISettings::values.screenshot_path = ReadSetting(QStringLiteral("screenshotPath")).toString();
520 UISettings::values.game_dir_deprecated =
520 ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); 521 ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString();
521 UISettings::values.game_directory_deepscan = 522 UISettings::values.game_dir_deprecated_deepscan =
522 ReadSetting(QStringLiteral("gameListDeepScan"), false).toBool(); 523 ReadSetting(QStringLiteral("gameListDeepScan"), false).toBool();
524 const int gamedirs_size = qt_config->beginReadArray(QStringLiteral("gamedirs"));
525 for (int i = 0; i < gamedirs_size; ++i) {
526 qt_config->setArrayIndex(i);
527 UISettings::GameDir game_dir;
528 game_dir.path = ReadSetting(QStringLiteral("path")).toString();
529 game_dir.deep_scan = ReadSetting(QStringLiteral("deep_scan"), false).toBool();
530 game_dir.expanded = ReadSetting(QStringLiteral("expanded"), true).toBool();
531 UISettings::values.game_dirs.append(game_dir);
532 }
533 qt_config->endArray();
534 // create NAND and SD card directories if empty, these are not removable through the UI,
535 // also carries over old game list settings if present
536 if (UISettings::values.game_dirs.isEmpty()) {
537 UISettings::GameDir game_dir;
538 game_dir.path = QStringLiteral("SDMC");
539 game_dir.expanded = true;
540 UISettings::values.game_dirs.append(game_dir);
541 game_dir.path = QStringLiteral("UserNAND");
542 UISettings::values.game_dirs.append(game_dir);
543 game_dir.path = QStringLiteral("SysNAND");
544 UISettings::values.game_dirs.append(game_dir);
545 if (UISettings::values.game_dir_deprecated != QStringLiteral(".")) {
546 game_dir.path = UISettings::values.game_dir_deprecated;
547 game_dir.deep_scan = UISettings::values.game_dir_deprecated_deepscan;
548 UISettings::values.game_dirs.append(game_dir);
549 }
550 }
523 UISettings::values.recent_files = ReadSetting(QStringLiteral("recentFiles")).toStringList(); 551 UISettings::values.recent_files = ReadSetting(QStringLiteral("recentFiles")).toStringList();
524 552
525 qt_config->endGroup(); 553 qt_config->endGroup();
@@ -829,7 +857,6 @@ void Config::SaveControlValues() {
829void Config::SaveCoreValues() { 857void Config::SaveCoreValues() {
830 qt_config->beginGroup(QStringLiteral("Core")); 858 qt_config->beginGroup(QStringLiteral("Core"));
831 859
832 WriteSetting(QStringLiteral("use_cpu_jit"), Settings::values.use_cpu_jit, true);
833 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); 860 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false);
834 861
835 qt_config->endGroup(); 862 qt_config->endGroup();
@@ -858,6 +885,7 @@ void Config::SaveDebuggingValues() {
858 QString::fromStdString(Settings::values.program_args), QStringLiteral("")); 885 QString::fromStdString(Settings::values.program_args), QStringLiteral(""));
859 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); 886 WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false);
860 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); 887 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
888 WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
861 889
862 qt_config->endGroup(); 890 qt_config->endGroup();
863} 891}
@@ -898,10 +926,15 @@ void Config::SavePathValues() {
898 WriteSetting(QStringLiteral("romsPath"), UISettings::values.roms_path); 926 WriteSetting(QStringLiteral("romsPath"), UISettings::values.roms_path);
899 WriteSetting(QStringLiteral("symbolsPath"), UISettings::values.symbols_path); 927 WriteSetting(QStringLiteral("symbolsPath"), UISettings::values.symbols_path);
900 WriteSetting(QStringLiteral("screenshotPath"), UISettings::values.screenshot_path); 928 WriteSetting(QStringLiteral("screenshotPath"), UISettings::values.screenshot_path);
901 WriteSetting(QStringLiteral("gameListRootDir"), UISettings::values.game_directory_path, 929 qt_config->beginWriteArray(QStringLiteral("gamedirs"));
902 QStringLiteral(".")); 930 for (int i = 0; i < UISettings::values.game_dirs.size(); ++i) {
903 WriteSetting(QStringLiteral("gameListDeepScan"), UISettings::values.game_directory_deepscan, 931 qt_config->setArrayIndex(i);
904 false); 932 const auto& game_dir = UISettings::values.game_dirs[i];
933 WriteSetting(QStringLiteral("path"), game_dir.path);
934 WriteSetting(QStringLiteral("deep_scan"), game_dir.deep_scan, false);
935 WriteSetting(QStringLiteral("expanded"), game_dir.expanded, true);
936 }
937 qt_config->endArray();
905 WriteSetting(QStringLiteral("recentFiles"), UISettings::values.recent_files); 938 WriteSetting(QStringLiteral("recentFiles"), UISettings::values.recent_files);
906 939
907 qt_config->endGroup(); 940 qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 63426fe4f..5b7e03056 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -12,13 +12,13 @@
12#include "ui_configure_debug.h" 12#include "ui_configure_debug.h"
13#include "yuzu/configuration/configure_debug.h" 13#include "yuzu/configuration/configure_debug.h"
14#include "yuzu/debugger/console.h" 14#include "yuzu/debugger/console.h"
15#include "yuzu/ui_settings.h" 15#include "yuzu/uisettings.h"
16 16
17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { 17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) {
18 ui->setupUi(this); 18 ui->setupUi(this);
19 SetConfiguration(); 19 SetConfiguration();
20 20
21 connect(ui->open_log_button, &QPushButton::pressed, []() { 21 connect(ui->open_log_button, &QPushButton::clicked, []() {
22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); 22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir));
23 QDesktopServices::openUrl(QUrl::fromLocalFile(path)); 23 QDesktopServices::openUrl(QUrl::fromLocalFile(path));
24 }); 24 });
@@ -37,6 +37,7 @@ void ConfigureDebug::SetConfiguration() {
37 ui->dump_exefs->setChecked(Settings::values.dump_exefs); 37 ui->dump_exefs->setChecked(Settings::values.dump_exefs);
38 ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso); 38 ui->dump_decompressed_nso->setChecked(Settings::values.dump_nso);
39 ui->reporting_services->setChecked(Settings::values.reporting_services); 39 ui->reporting_services->setChecked(Settings::values.reporting_services);
40 ui->quest_flag->setChecked(Settings::values.quest_flag);
40} 41}
41 42
42void ConfigureDebug::ApplyConfiguration() { 43void ConfigureDebug::ApplyConfiguration() {
@@ -48,6 +49,7 @@ void ConfigureDebug::ApplyConfiguration() {
48 Settings::values.dump_exefs = ui->dump_exefs->isChecked(); 49 Settings::values.dump_exefs = ui->dump_exefs->isChecked();
49 Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked(); 50 Settings::values.dump_nso = ui->dump_decompressed_nso->isChecked();
50 Settings::values.reporting_services = ui->reporting_services->isChecked(); 51 Settings::values.reporting_services = ui->reporting_services->isChecked();
52 Settings::values.quest_flag = ui->quest_flag->isChecked();
51 Debugger::ToggleConsole(); 53 Debugger::ToggleConsole();
52 Log::Filter filter; 54 Log::Filter filter;
53 filter.ParseFilterString(Settings::values.log_filter); 55 filter.ParseFilterString(Settings::values.log_filter);
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index 4a7e3dc3d..7e109cef0 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -7,7 +7,7 @@
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>400</width> 9 <width>400</width>
10 <height>357</height> 10 <height>474</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="windowTitle"> 13 <property name="windowTitle">
@@ -181,6 +181,22 @@
181 </widget> 181 </widget>
182 </item> 182 </item>
183 <item> 183 <item>
184 <widget class="QGroupBox" name="groupBox_5">
185 <property name="title">
186 <string>Advanced</string>
187 </property>
188 <layout class="QVBoxLayout" name="verticalLayout">
189 <item>
190 <widget class="QCheckBox" name="quest_flag">
191 <property name="text">
192 <string>Kiosk (Quest) Mode</string>
193 </property>
194 </widget>
195 </item>
196 </layout>
197 </widget>
198 </item>
199 <item>
184 <spacer name="verticalSpacer"> 200 <spacer name="verticalSpacer">
185 <property name="orientation"> 201 <property name="orientation">
186 <enum>Qt::Vertical</enum> 202 <enum>Qt::Vertical</enum>
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index e636964e3..775e3f2ea 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -68,12 +68,14 @@ void ConfigureDialog::RetranslateUI() {
68 ui->tabWidget->setCurrentIndex(old_index); 68 ui->tabWidget->setCurrentIndex(old_index);
69} 69}
70 70
71Q_DECLARE_METATYPE(QList<QWidget*>);
72
71void ConfigureDialog::PopulateSelectionList() { 73void ConfigureDialog::PopulateSelectionList() {
72 const std::array<std::pair<QString, QStringList>, 4> items{ 74 const std::array<std::pair<QString, QList<QWidget*>>, 4> items{
73 {{tr("General"), {tr("General"), tr("Web"), tr("Debug"), tr("Game List")}}, 75 {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->gameListTab}},
74 {tr("System"), {tr("System"), tr("Profiles"), tr("Audio")}}, 76 {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->audioTab}},
75 {tr("Graphics"), {tr("Graphics")}}, 77 {tr("Graphics"), {ui->graphicsTab}},
76 {tr("Controls"), {tr("Input"), tr("Hotkeys")}}}, 78 {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}},
77 }; 79 };
78 80
79 [[maybe_unused]] const QSignalBlocker blocker(ui->selectorList); 81 [[maybe_unused]] const QSignalBlocker blocker(ui->selectorList);
@@ -81,7 +83,7 @@ void ConfigureDialog::PopulateSelectionList() {
81 ui->selectorList->clear(); 83 ui->selectorList->clear();
82 for (const auto& entry : items) { 84 for (const auto& entry : items) {
83 auto* const item = new QListWidgetItem(entry.first); 85 auto* const item = new QListWidgetItem(entry.first);
84 item->setData(Qt::UserRole, entry.second); 86 item->setData(Qt::UserRole, QVariant::fromValue(entry.second));
85 87
86 ui->selectorList->addItem(item); 88 ui->selectorList->addItem(item);
87 } 89 }
@@ -93,24 +95,26 @@ void ConfigureDialog::UpdateVisibleTabs() {
93 return; 95 return;
94 } 96 }
95 97
96 const std::map<QString, QWidget*> widgets = { 98 const std::map<QWidget*, QString> widgets = {
97 {tr("General"), ui->generalTab}, 99 {ui->generalTab, tr("General")},
98 {tr("System"), ui->systemTab}, 100 {ui->systemTab, tr("System")},
99 {tr("Profiles"), ui->profileManagerTab}, 101 {ui->profileManagerTab, tr("Profiles")},
100 {tr("Input"), ui->inputTab}, 102 {ui->inputTab, tr("Input")},
101 {tr("Hotkeys"), ui->hotkeysTab}, 103 {ui->hotkeysTab, tr("Hotkeys")},
102 {tr("Graphics"), ui->graphicsTab}, 104 {ui->graphicsTab, tr("Graphics")},
103 {tr("Audio"), ui->audioTab}, 105 {ui->audioTab, tr("Audio")},
104 {tr("Debug"), ui->debugTab}, 106 {ui->debugTab, tr("Debug")},
105 {tr("Web"), ui->webTab}, 107 {ui->webTab, tr("Web")},
106 {tr("Game List"), ui->gameListTab}, 108 {ui->gameListTab, tr("Game List")},
107 }; 109 };
108 110
109 [[maybe_unused]] const QSignalBlocker blocker(ui->tabWidget); 111 [[maybe_unused]] const QSignalBlocker blocker(ui->tabWidget);
110 112
111 ui->tabWidget->clear(); 113 ui->tabWidget->clear();
112 const QStringList tabs = items[0]->data(Qt::UserRole).toStringList(); 114
113 for (const auto& tab : tabs) { 115 const QList<QWidget*> tabs = qvariant_cast<QList<QWidget*>>(items[0]->data(Qt::UserRole));
114 ui->tabWidget->addTab(widgets.find(tab)->second, tab); 116
117 for (const auto tab : tabs) {
118 ui->tabWidget->addTab(tab, widgets.at(tab));
115 } 119 }
116} 120}
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index d1724ba89..daedbc33e 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -9,7 +9,7 @@
9#include "core/settings.h" 9#include "core/settings.h"
10#include "ui_configure_gamelist.h" 10#include "ui_configure_gamelist.h"
11#include "yuzu/configuration/configure_gamelist.h" 11#include "yuzu/configuration/configure_gamelist.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15constexpr std::array default_icon_sizes{ 15constexpr std::array default_icon_sizes{
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 06d368dfc..10bcd650e 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -6,7 +6,7 @@
6#include "core/settings.h" 6#include "core/settings.h"
7#include "ui_configure_general.h" 7#include "ui_configure_general.h"
8#include "yuzu/configuration/configure_general.h" 8#include "yuzu/configuration/configure_general.h"
9#include "yuzu/ui_settings.h" 9#include "yuzu/uisettings.h"
10 10
11ConfigureGeneral::ConfigureGeneral(QWidget* parent) 11ConfigureGeneral::ConfigureGeneral(QWidget* parent)
12 : QWidget(parent), ui(new Ui::ConfigureGeneral) { 12 : QWidget(parent), ui(new Ui::ConfigureGeneral) {
@@ -20,30 +20,29 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
20 20
21 SetConfiguration(); 21 SetConfiguration();
22 22
23 connect(ui->toggle_deepscan, &QCheckBox::stateChanged, this, 23 connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled);
24 [] { UISettings::values.is_game_list_reload_pending.exchange(true); });
25
26 ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
27} 24}
28 25
29ConfigureGeneral::~ConfigureGeneral() = default; 26ConfigureGeneral::~ConfigureGeneral() = default;
30 27
31void ConfigureGeneral::SetConfiguration() { 28void ConfigureGeneral::SetConfiguration() {
32 ui->toggle_deepscan->setChecked(UISettings::values.game_directory_deepscan);
33 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing); 29 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
34 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot); 30 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
35 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); 31 ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
36 ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit); 32
33 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
34 ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked());
35 ui->frame_limit->setValue(Settings::values.frame_limit);
37} 36}
38 37
39void ConfigureGeneral::ApplyConfiguration() { 38void ConfigureGeneral::ApplyConfiguration() {
40 UISettings::values.game_directory_deepscan = ui->toggle_deepscan->isChecked();
41 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); 39 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
42 UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked(); 40 UISettings::values.select_user_on_boot = ui->toggle_user_on_boot->isChecked();
43 UISettings::values.theme = 41 UISettings::values.theme =
44 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString(); 42 ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
45 43
46 Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked(); 44 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
45 Settings::values.frame_limit = ui->frame_limit->value();
47} 46}
48 47
49void ConfigureGeneral::changeEvent(QEvent* event) { 48void ConfigureGeneral::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index 1a5721fe7..0bb91d64b 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -25,11 +25,31 @@
25 <item> 25 <item>
26 <layout class="QVBoxLayout" name="GeneralVerticalLayout"> 26 <layout class="QVBoxLayout" name="GeneralVerticalLayout">
27 <item> 27 <item>
28 <widget class="QCheckBox" name="toggle_deepscan"> 28 <layout class="QHBoxLayout" name="horizontalLayout_2">
29 <property name="text"> 29 <item>
30 <string>Search sub-directories for games</string> 30 <widget class="QCheckBox" name="toggle_frame_limit">
31 </property> 31 <property name="text">
32 </widget> 32 <string>Limit Speed Percent</string>
33 </property>
34 </widget>
35 </item>
36 <item>
37 <widget class="QSpinBox" name="frame_limit">
38 <property name="suffix">
39 <string>%</string>
40 </property>
41 <property name="minimum">
42 <number>1</number>
43 </property>
44 <property name="maximum">
45 <number>9999</number>
46 </property>
47 <property name="value">
48 <number>100</number>
49 </property>
50 </widget>
51 </item>
52 </layout>
33 </item> 53 </item>
34 <item> 54 <item>
35 <widget class="QCheckBox" name="toggle_check_exit"> 55 <widget class="QCheckBox" name="toggle_check_exit">
@@ -51,26 +71,6 @@
51 </widget> 71 </widget>
52 </item> 72 </item>
53 <item> 73 <item>
54 <widget class="QGroupBox" name="PerformanceGroupBox">
55 <property name="title">
56 <string>Performance</string>
57 </property>
58 <layout class="QHBoxLayout" name="PerformanceHorizontalLayout">
59 <item>
60 <layout class="QVBoxLayout" name="PerformanceVerticalLayout">
61 <item>
62 <widget class="QCheckBox" name="use_cpu_jit">
63 <property name="text">
64 <string>Enable CPU JIT</string>
65 </property>
66 </widget>
67 </item>
68 </layout>
69 </item>
70 </layout>
71 </widget>
72 </item>
73 <item>
74 <widget class="QGroupBox" name="theme_group_box"> 74 <widget class="QGroupBox" name="theme_group_box">
75 <property name="title"> 75 <property name="title">
76 <string>Theme</string> 76 <string>Theme</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 2b17b250c..2c9e322c9 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -55,7 +55,6 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
55 55
56 SetConfiguration(); 56 SetConfiguration();
57 57
58 connect(ui->toggle_frame_limit, &QCheckBox::toggled, ui->frame_limit, &QSpinBox::setEnabled);
59 connect(ui->bg_button, &QPushButton::clicked, this, [this] { 58 connect(ui->bg_button, &QPushButton::clicked, this, [this] {
60 const QColor new_bg_color = QColorDialog::getColor(bg_color); 59 const QColor new_bg_color = QColorDialog::getColor(bg_color);
61 if (!new_bg_color.isValid()) { 60 if (!new_bg_color.isValid()) {
@@ -72,9 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
72 71
73 ui->resolution_factor_combobox->setCurrentIndex( 72 ui->resolution_factor_combobox->setCurrentIndex(
74 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 73 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
75 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
76 ui->frame_limit->setEnabled(ui->toggle_frame_limit->isChecked());
77 ui->frame_limit->setValue(Settings::values.frame_limit);
78 ui->use_disk_shader_cache->setEnabled(runtime_lock); 74 ui->use_disk_shader_cache->setEnabled(runtime_lock);
79 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 75 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
80 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 76 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
@@ -89,8 +85,6 @@ void ConfigureGraphics::SetConfiguration() {
89void ConfigureGraphics::ApplyConfiguration() { 85void ConfigureGraphics::ApplyConfiguration() {
90 Settings::values.resolution_factor = 86 Settings::values.resolution_factor =
91 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 87 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
92 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
93 Settings::values.frame_limit = ui->frame_limit->value();
94 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 88 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
95 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 89 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
96 Settings::values.use_asynchronous_gpu_emulation = 90 Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 15ab18ecd..0309ee300 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -23,33 +23,6 @@
23 </property> 23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_2"> 24 <layout class="QVBoxLayout" name="verticalLayout_2">
25 <item> 25 <item>
26 <layout class="QHBoxLayout" name="horizontalLayout_2">
27 <item>
28 <widget class="QCheckBox" name="toggle_frame_limit">
29 <property name="text">
30 <string>Limit Speed Percent</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <widget class="QSpinBox" name="frame_limit">
36 <property name="suffix">
37 <string>%</string>
38 </property>
39 <property name="minimum">
40 <number>1</number>
41 </property>
42 <property name="maximum">
43 <number>9999</number>
44 </property>
45 <property name="value">
46 <number>100</number>
47 </property>
48 </widget>
49 </item>
50 </layout>
51 </item>
52 <item>
53 <widget class="QCheckBox" name="use_disk_shader_cache"> 26 <widget class="QCheckBox" name="use_disk_shader_cache">
54 <property name="text"> 27 <property name="text">
55 <string>Use disk shader cache</string> 28 <string>Use disk shader cache</string>
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp
index 4dd775aab..7613197f2 100644
--- a/src/yuzu/configuration/configure_input.cpp
+++ b/src/yuzu/configuration/configure_input.cpp
@@ -79,7 +79,7 @@ ConfigureInput::ConfigureInput(QWidget* parent)
79 LoadConfiguration(); 79 LoadConfiguration();
80 UpdateUIEnabled(); 80 UpdateUIEnabled();
81 81
82 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 82 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
83 &ConfigureInput::RestoreDefaults); 83 &ConfigureInput::RestoreDefaults);
84 84
85 for (auto* enabled : players_controller) { 85 for (auto* enabled : players_controller) {
@@ -96,20 +96,20 @@ ConfigureInput::ConfigureInput(QWidget* parent)
96 &ConfigureInput::UpdateUIEnabled); 96 &ConfigureInput::UpdateUIEnabled);
97 97
98 for (std::size_t i = 0; i < players_configure.size(); ++i) { 98 for (std::size_t i = 0; i < players_configure.size(); ++i) {
99 connect(players_configure[i], &QPushButton::pressed, this, 99 connect(players_configure[i], &QPushButton::clicked, this,
100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); 100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); });
101 } 101 }
102 102
103 connect(ui->handheld_configure, &QPushButton::pressed, this, 103 connect(ui->handheld_configure, &QPushButton::clicked, this,
104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); 104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); });
105 105
106 connect(ui->debug_configure, &QPushButton::pressed, this, 106 connect(ui->debug_configure, &QPushButton::clicked, this,
107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); 107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); });
108 108
109 connect(ui->mouse_advanced, &QPushButton::pressed, this, 109 connect(ui->mouse_advanced, &QPushButton::clicked, this,
110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); 110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); });
111 111
112 connect(ui->touchscreen_advanced, &QPushButton::pressed, this, 112 connect(ui->touchscreen_advanced, &QPushButton::clicked, this,
113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); 113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); });
114} 114}
115 115
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 916baccc1..a968cfb5d 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -244,7 +244,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
244 } 244 }
245 245
246 button->setContextMenuPolicy(Qt::CustomContextMenu); 246 button->setContextMenuPolicy(Qt::CustomContextMenu);
247 connect(button, &QPushButton::released, [=] { 247 connect(button, &QPushButton::clicked, [=] {
248 HandleClick( 248 HandleClick(
249 button_map[button_id], 249 button_map[button_id],
250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -273,7 +273,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
273 } 273 }
274 274
275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu); 275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu);
276 connect(analog_button, &QPushButton::released, [=]() { 276 connect(analog_button, &QPushButton::clicked, [=]() {
277 HandleClick(analog_map_buttons[analog_id][sub_button_id], 277 HandleClick(analog_map_buttons[analog_id][sub_button_id],
278 [=](const Common::ParamPackage& params) { 278 [=](const Common::ParamPackage& params) {
279 SetAnalogButton(params, analogs_param[analog_id], 279 SetAnalogButton(params, analogs_param[analog_id],
@@ -300,19 +300,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
300 menu_location)); 300 menu_location));
301 }); 301 });
302 } 302 }
303 connect(analog_map_stick[analog_id], &QPushButton::released, [=] { 303 connect(analog_map_stick[analog_id], &QPushButton::clicked, [=] {
304 QMessageBox::information(this, tr("Information"), 304 if (QMessageBox::information(
305 tr("After pressing OK, first move your joystick horizontally, " 305 this, tr("Information"),
306 "and then vertically.")); 306 tr("After pressing OK, first move your joystick horizontally, "
307 HandleClick( 307 "and then vertically."),
308 analog_map_stick[analog_id], 308 QMessageBox::Ok | QMessageBox::Cancel) == QMessageBox::Ok) {
309 [=](const Common::ParamPackage& params) { analogs_param[analog_id] = params; }, 309 HandleClick(
310 InputCommon::Polling::DeviceType::Analog); 310 analog_map_stick[analog_id],
311 [=](const Common::ParamPackage& params) { analogs_param[analog_id] = params; },
312 InputCommon::Polling::DeviceType::Analog);
313 }
311 }); 314 });
312 } 315 }
313 316
314 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 317 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
315 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 318 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
316 319
317 timeout_timer->setSingleShot(true); 320 timeout_timer->setSingleShot(true);
318 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 321 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp
index 864803ea3..ab3a11d30 100644
--- a/src/yuzu/configuration/configure_input_simple.cpp
+++ b/src/yuzu/configuration/configure_input_simple.cpp
@@ -9,7 +9,7 @@
9#include "yuzu/configuration/configure_input.h" 9#include "yuzu/configuration/configure_input.h"
10#include "yuzu/configuration/configure_input_player.h" 10#include "yuzu/configuration/configure_input_player.h"
11#include "yuzu/configuration/configure_input_simple.h" 11#include "yuzu/configuration/configure_input_simple.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15 15
@@ -101,7 +101,7 @@ ConfigureInputSimple::ConfigureInputSimple(QWidget* parent)
101 101
102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, 102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
103 &ConfigureInputSimple::OnSelectProfile); 103 &ConfigureInputSimple::OnSelectProfile);
104 connect(ui->profile_configure, &QPushButton::pressed, this, &ConfigureInputSimple::OnConfigure); 104 connect(ui->profile_configure, &QPushButton::clicked, this, &ConfigureInputSimple::OnConfigure);
105 105
106 LoadConfiguration(); 106 LoadConfiguration();
107} 107}
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp
index b7305e653..0a4abe34f 100644
--- a/src/yuzu/configuration/configure_mouse_advanced.cpp
+++ b/src/yuzu/configuration/configure_mouse_advanced.cpp
@@ -83,7 +83,7 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
83 } 83 }
84 84
85 button->setContextMenuPolicy(Qt::CustomContextMenu); 85 button->setContextMenuPolicy(Qt::CustomContextMenu);
86 connect(button, &QPushButton::released, [=] { 86 connect(button, &QPushButton::clicked, [=] {
87 HandleClick( 87 HandleClick(
88 button_map[button_id], 88 button_map[button_id],
89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -104,8 +104,8 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
104 }); 104 });
105 } 105 }
106 106
107 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 107 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
108 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 108 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
109 109
110 timeout_timer->setSingleShot(true); 110 timeout_timer->setSingleShot(true);
111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp
index 90336e235..d7f259f12 100644
--- a/src/yuzu/configuration/configure_per_general.cpp
+++ b/src/yuzu/configuration/configure_per_general.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/configuration/config.h" 23#include "yuzu/configuration/config.h"
24#include "yuzu/configuration/configure_input.h" 24#include "yuzu/configuration/configure_input.h"
25#include "yuzu/configuration/configure_per_general.h" 25#include "yuzu/configuration/configure_per_general.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27#include "yuzu/util/util.h" 27#include "yuzu/util/util.h"
28 28
29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) 29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id)
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp
index c90f4cdd8..f53423440 100644
--- a/src/yuzu/configuration/configure_profile_manager.cpp
+++ b/src/yuzu/configuration/configure_profile_manager.cpp
@@ -108,10 +108,10 @@ ConfigureProfileManager ::ConfigureProfileManager(QWidget* parent)
108 108
109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); 109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser);
110 110
111 connect(ui->pm_add, &QPushButton::pressed, this, &ConfigureProfileManager::AddUser); 111 connect(ui->pm_add, &QPushButton::clicked, this, &ConfigureProfileManager::AddUser);
112 connect(ui->pm_rename, &QPushButton::pressed, this, &ConfigureProfileManager::RenameUser); 112 connect(ui->pm_rename, &QPushButton::clicked, this, &ConfigureProfileManager::RenameUser);
113 connect(ui->pm_remove, &QPushButton::pressed, this, &ConfigureProfileManager::DeleteUser); 113 connect(ui->pm_remove, &QPushButton::clicked, this, &ConfigureProfileManager::DeleteUser);
114 connect(ui->pm_set_image, &QPushButton::pressed, this, &ConfigureProfileManager::SetUserImage); 114 connect(ui->pm_set_image, &QPushButton::clicked, this, &ConfigureProfileManager::SetUserImage);
115 115
116 scene = new QGraphicsScene; 116 scene = new QGraphicsScene;
117 ui->current_user_icon->setScene(scene); 117 ui->current_user_icon->setScene(scene);
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
index 8ced28c75..7d7cc00b7 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
@@ -11,7 +11,7 @@ ConfigureTouchscreenAdvanced::ConfigureTouchscreenAdvanced(QWidget* parent)
11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { 11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) {
12 ui->setupUi(this); 12 ui->setupUi(this);
13 13
14 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 14 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
15 &ConfigureTouchscreenAdvanced::RestoreDefaults); 15 &ConfigureTouchscreenAdvanced::RestoreDefaults);
16 16
17 LoadConfiguration(); 17 LoadConfiguration();
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp
index 5a70ef168..336b062b3 100644
--- a/src/yuzu/configuration/configure_web.cpp
+++ b/src/yuzu/configuration/configure_web.cpp
@@ -9,7 +9,7 @@
9#include "core/telemetry_session.h" 9#include "core/telemetry_session.h"
10#include "ui_configure_web.h" 10#include "ui_configure_web.h"
11#include "yuzu/configuration/configure_web.h" 11#include "yuzu/configuration/configure_web.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14ConfigureWeb::ConfigureWeb(QWidget* parent) 14ConfigureWeb::ConfigureWeb(QWidget* parent)
15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { 15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) {
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp
index 320898f6a..207ff4d58 100644
--- a/src/yuzu/debugger/console.cpp
+++ b/src/yuzu/debugger/console.cpp
@@ -10,7 +10,7 @@
10 10
11#include "common/logging/backend.h" 11#include "common/logging/backend.h"
12#include "yuzu/debugger/console.h" 12#include "yuzu/debugger/console.h"
13#include "yuzu/ui_settings.h" 13#include "yuzu/uisettings.h"
14 14
15namespace Debugger { 15namespace Debugger {
16void ToggleConsole() { 16void ToggleConsole() {
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index 9d87a41eb..ea0079353 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -9,7 +9,7 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "core/loader/loader.h" 10#include "core/loader/loader.h"
11#include "yuzu/discord_impl.h" 11#include "yuzu/discord_impl.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace DiscordRPC { 14namespace DiscordRPC {
15 15
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 1885587af..d5fab2f1f 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/game_list_p.h" 23#include "yuzu/game_list_p.h"
24#include "yuzu/game_list_worker.h" 24#include "yuzu/game_list_worker.h"
25#include "yuzu/main.h" 25#include "yuzu/main.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27 27
28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} 28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {}
29 29
@@ -34,7 +34,6 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve
34 return QObject::eventFilter(obj, event); 34 return QObject::eventFilter(obj, event);
35 35
36 QKeyEvent* keyEvent = static_cast<QKeyEvent*>(event); 36 QKeyEvent* keyEvent = static_cast<QKeyEvent*>(event);
37 int rowCount = gamelist->tree_view->model()->rowCount();
38 QString edit_filter_text = gamelist->search_field->edit_filter->text().toLower(); 37 QString edit_filter_text = gamelist->search_field->edit_filter->text().toLower();
39 38
40 // If the searchfield's text hasn't changed special function keys get checked 39 // If the searchfield's text hasn't changed special function keys get checked
@@ -56,19 +55,9 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve
56 // If there is only one result launch this game 55 // If there is only one result launch this game
57 case Qt::Key_Return: 56 case Qt::Key_Return:
58 case Qt::Key_Enter: { 57 case Qt::Key_Enter: {
59 QStandardItemModel* item_model = new QStandardItemModel(gamelist->tree_view); 58 if (gamelist->search_field->visible == 1) {
60 QModelIndex root_index = item_model->invisibleRootItem()->index(); 59 QString file_path = gamelist->getLastFilterResultItem();
61 QStandardItem* child_file; 60
62 QString file_path;
63 int resultCount = 0;
64 for (int i = 0; i < rowCount; ++i) {
65 if (!gamelist->tree_view->isRowHidden(i, root_index)) {
66 ++resultCount;
67 child_file = gamelist->item_model->item(i, 0);
68 file_path = child_file->data(GameListItemPath::FullPathRole).toString();
69 }
70 }
71 if (resultCount == 1) {
72 // To avoid loading error dialog loops while confirming them using enter 61 // To avoid loading error dialog loops while confirming them using enter
73 // Also users usually want to run a different game after closing one 62 // Also users usually want to run a different game after closing one
74 gamelist->search_field->edit_filter->clear(); 63 gamelist->search_field->edit_filter->clear();
@@ -88,9 +77,31 @@ bool GameListSearchField::KeyReleaseEater::eventFilter(QObject* obj, QEvent* eve
88} 77}
89 78
90void GameListSearchField::setFilterResult(int visible, int total) { 79void GameListSearchField::setFilterResult(int visible, int total) {
80 this->visible = visible;
81 this->total = total;
82
91 label_filter_result->setText(tr("%1 of %n result(s)", "", total).arg(visible)); 83 label_filter_result->setText(tr("%1 of %n result(s)", "", total).arg(visible));
92} 84}
93 85
86QString GameList::getLastFilterResultItem() const {
87 QStandardItem* folder;
88 QStandardItem* child;
89 QString file_path;
90 const int folder_count = item_model->rowCount();
91 for (int i = 0; i < folder_count; ++i) {
92 folder = item_model->item(i, 0);
93 const QModelIndex folder_index = folder->index();
94 const int children_count = folder->rowCount();
95 for (int j = 0; j < children_count; ++j) {
96 if (!tree_view->isRowHidden(j, folder_index)) {
97 child = folder->child(j, 0);
98 file_path = child->data(GameListItemPath::FullPathRole).toString();
99 }
100 }
101 }
102 return file_path;
103}
104
94void GameListSearchField::clear() { 105void GameListSearchField::clear() {
95 edit_filter->clear(); 106 edit_filter->clear();
96} 107}
@@ -147,45 +158,120 @@ static bool ContainsAllWords(const QString& haystack, const QString& userinput)
147 [&haystack](const QString& s) { return haystack.contains(s); }); 158 [&haystack](const QString& s) { return haystack.contains(s); });
148} 159}
149 160
161// Syncs the expanded state of Game Directories with settings to persist across sessions
162void GameList::onItemExpanded(const QModelIndex& item) {
163 const auto type = item.data(GameListItem::TypeRole).value<GameListItemType>();
164 if (type == GameListItemType::CustomDir || type == GameListItemType::SdmcDir ||
165 type == GameListItemType::UserNandDir || type == GameListItemType::SysNandDir)
166 item.data(GameListDir::GameDirRole).value<UISettings::GameDir*>()->expanded =
167 tree_view->isExpanded(item);
168}
169
150// Event in order to filter the gamelist after editing the searchfield 170// Event in order to filter the gamelist after editing the searchfield
151void GameList::onTextChanged(const QString& new_text) { 171void GameList::onTextChanged(const QString& new_text) {
152 const int row_count = tree_view->model()->rowCount(); 172 const int folder_count = tree_view->model()->rowCount();
153 const QString edit_filter_text = new_text.toLower(); 173 QString edit_filter_text = new_text.toLower();
154 const QModelIndex root_index = item_model->invisibleRootItem()->index(); 174 QStandardItem* folder;
175 QStandardItem* child;
176 int children_total = 0;
177 QModelIndex root_index = item_model->invisibleRootItem()->index();
155 178
156 // If the searchfield is empty every item is visible 179 // If the searchfield is empty every item is visible
157 // Otherwise the filter gets applied 180 // Otherwise the filter gets applied
158 if (edit_filter_text.isEmpty()) { 181 if (edit_filter_text.isEmpty()) {
159 for (int i = 0; i < row_count; ++i) { 182 for (int i = 0; i < folder_count; ++i) {
160 tree_view->setRowHidden(i, root_index, false); 183 folder = item_model->item(i, 0);
184 const QModelIndex folder_index = folder->index();
185 const int children_count = folder->rowCount();
186 for (int j = 0; j < children_count; ++j) {
187 ++children_total;
188 tree_view->setRowHidden(j, folder_index, false);
189 }
161 } 190 }
162 search_field->setFilterResult(row_count, row_count); 191 search_field->setFilterResult(children_total, children_total);
163 } else { 192 } else {
164 int result_count = 0; 193 int result_count = 0;
165 for (int i = 0; i < row_count; ++i) { 194 for (int i = 0; i < folder_count; ++i) {
166 const QStandardItem* child_file = item_model->item(i, 0); 195 folder = item_model->item(i, 0);
167 const QString file_path = 196 const QModelIndex folder_index = folder->index();
168 child_file->data(GameListItemPath::FullPathRole).toString().toLower(); 197 const int children_count = folder->rowCount();
169 const QString file_title = 198 for (int j = 0; j < children_count; ++j) {
170 child_file->data(GameListItemPath::TitleRole).toString().toLower(); 199 ++children_total;
171 const QString file_program_id = 200 const QStandardItem* child = folder->child(j, 0);
172 child_file->data(GameListItemPath::ProgramIdRole).toString().toLower(); 201 const QString file_path =
173 202 child->data(GameListItemPath::FullPathRole).toString().toLower();
174 // Only items which filename in combination with its title contains all words 203 const QString file_title =
175 // that are in the searchfield will be visible in the gamelist 204 child->data(GameListItemPath::TitleRole).toString().toLower();
176 // The search is case insensitive because of toLower() 205 const QString file_program_id =
177 // I decided not to use Qt::CaseInsensitive in containsAllWords to prevent 206 child->data(GameListItemPath::ProgramIdRole).toString().toLower();
178 // multiple conversions of edit_filter_text for each game in the gamelist 207
179 const QString file_name = file_path.mid(file_path.lastIndexOf(QLatin1Char{'/'}) + 1) + 208 // Only items which filename in combination with its title contains all words
180 QLatin1Char{' '} + file_title; 209 // that are in the searchfield will be visible in the gamelist
181 if (ContainsAllWords(file_name, edit_filter_text) || 210 // The search is case insensitive because of toLower()
182 (file_program_id.count() == 16 && edit_filter_text.contains(file_program_id))) { 211 // I decided not to use Qt::CaseInsensitive in containsAllWords to prevent
183 tree_view->setRowHidden(i, root_index, false); 212 // multiple conversions of edit_filter_text for each game in the gamelist
184 ++result_count; 213 const QString file_name =
185 } else { 214 file_path.mid(file_path.lastIndexOf(QLatin1Char{'/'}) + 1) + QLatin1Char{' '} +
186 tree_view->setRowHidden(i, root_index, true); 215 file_title;
216 if (ContainsAllWords(file_name, edit_filter_text) ||
217 (file_program_id.count() == 16 && edit_filter_text.contains(file_program_id))) {
218 tree_view->setRowHidden(j, folder_index, false);
219 ++result_count;
220 } else {
221 tree_view->setRowHidden(j, folder_index, true);
222 }
223 search_field->setFilterResult(result_count, children_total);
187 } 224 }
188 search_field->setFilterResult(result_count, row_count); 225 }
226 }
227}
228
229void GameList::onUpdateThemedIcons() {
230 for (int i = 0; i < item_model->invisibleRootItem()->rowCount(); i++) {
231 QStandardItem* child = item_model->invisibleRootItem()->child(i);
232
233 const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
234 switch (child->data(GameListItem::TypeRole).value<GameListItemType>()) {
235 case GameListItemType::SdmcDir:
236 child->setData(
237 QIcon::fromTheme(QStringLiteral("sd_card"))
238 .pixmap(icon_size)
239 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
240 Qt::DecorationRole);
241 break;
242 case GameListItemType::UserNandDir:
243 child->setData(
244 QIcon::fromTheme(QStringLiteral("chip"))
245 .pixmap(icon_size)
246 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
247 Qt::DecorationRole);
248 break;
249 case GameListItemType::SysNandDir:
250 child->setData(
251 QIcon::fromTheme(QStringLiteral("chip"))
252 .pixmap(icon_size)
253 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
254 Qt::DecorationRole);
255 break;
256 case GameListItemType::CustomDir: {
257 const UISettings::GameDir* game_dir =
258 child->data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
259 const QString icon_name = QFileInfo::exists(game_dir->path)
260 ? QStringLiteral("folder")
261 : QStringLiteral("bad_folder");
262 child->setData(
263 QIcon::fromTheme(icon_name).pixmap(icon_size).scaled(
264 icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
265 Qt::DecorationRole);
266 break;
267 }
268 case GameListItemType::AddDir:
269 child->setData(
270 QIcon::fromTheme(QStringLiteral("plus"))
271 .pixmap(icon_size)
272 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
273 Qt::DecorationRole);
274 break;
189 } 275 }
190 } 276 }
191} 277}
@@ -214,7 +300,6 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
214 tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel); 300 tree_view->setHorizontalScrollMode(QHeaderView::ScrollPerPixel);
215 tree_view->setSortingEnabled(true); 301 tree_view->setSortingEnabled(true);
216 tree_view->setEditTriggers(QHeaderView::NoEditTriggers); 302 tree_view->setEditTriggers(QHeaderView::NoEditTriggers);
217 tree_view->setUniformRowHeights(true);
218 tree_view->setContextMenuPolicy(Qt::CustomContextMenu); 303 tree_view->setContextMenuPolicy(Qt::CustomContextMenu);
219 tree_view->setStyleSheet(QStringLiteral("QTreeView{ border: none; }")); 304 tree_view->setStyleSheet(QStringLiteral("QTreeView{ border: none; }"));
220 305
@@ -230,12 +315,16 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
230 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); 315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type"));
231 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); 316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size"));
232 } 317 }
318 item_model->setSortRole(GameListItemPath::TitleRole);
233 319
320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons);
234 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); 321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
235 connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu); 322 connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu);
323 connect(tree_view, &QTreeView::expanded, this, &GameList::onItemExpanded);
324 connect(tree_view, &QTreeView::collapsed, this, &GameList::onItemExpanded);
236 325
237 // We must register all custom types with the Qt Automoc system so that we are able to use it 326 // We must register all custom types with the Qt Automoc system so that we are able to use
238 // with signals/slots. In this case, QList falls under the umbrells of custom types. 327 // it with signals/slots. In this case, QList falls under the umbrells of custom types.
239 qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); 328 qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
240 329
241 layout->setContentsMargins(0, 0, 0, 0); 330 layout->setContentsMargins(0, 0, 0, 0);
@@ -263,38 +352,68 @@ void GameList::clearFilter() {
263 search_field->clear(); 352 search_field->clear();
264} 353}
265 354
266void GameList::AddEntry(const QList<QStandardItem*>& entry_items) { 355void GameList::AddDirEntry(GameListDir* entry_items) {
267 item_model->invisibleRootItem()->appendRow(entry_items); 356 item_model->invisibleRootItem()->appendRow(entry_items);
357 tree_view->setExpanded(
358 entry_items->index(),
359 entry_items->data(GameListDir::GameDirRole).value<UISettings::GameDir*>()->expanded);
268} 360}
269 361
270void GameList::ValidateEntry(const QModelIndex& item) { 362void GameList::AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent) {
271 // We don't care about the individual QStandardItem that was selected, but its row. 363 parent->appendRow(entry_items);
272 const int row = item_model->itemFromIndex(item)->row(); 364}
273 const QStandardItem* child_file = item_model->invisibleRootItem()->child(row, COLUMN_NAME);
274 const QString file_path = child_file->data(GameListItemPath::FullPathRole).toString();
275
276 if (file_path.isEmpty())
277 return;
278
279 if (!QFileInfo::exists(file_path))
280 return;
281 365
282 const QFileInfo file_info{file_path}; 366void GameList::ValidateEntry(const QModelIndex& item) {
283 if (file_info.isDir()) { 367 const auto selected = item.sibling(item.row(), 0);
284 const QDir dir{file_path}; 368
285 const QStringList matching_main = dir.entryList({QStringLiteral("main")}, QDir::Files); 369 switch (selected.data(GameListItem::TypeRole).value<GameListItemType>()) {
286 if (matching_main.size() == 1) { 370 case GameListItemType::Game: {
287 emit GameChosen(dir.path() + QDir::separator() + matching_main[0]); 371 const QString file_path = selected.data(GameListItemPath::FullPathRole).toString();
372 if (file_path.isEmpty())
373 return;
374 const QFileInfo file_info(file_path);
375 if (!file_info.exists())
376 return;
377
378 if (file_info.isDir()) {
379 const QDir dir{file_path};
380 const QStringList matching_main = dir.entryList({QStringLiteral("main")}, QDir::Files);
381 if (matching_main.size() == 1) {
382 emit GameChosen(dir.path() + QDir::separator() + matching_main[0]);
383 }
384 return;
288 } 385 }
289 return; 386
387 // Users usually want to run a different game after closing one
388 search_field->clear();
389 emit GameChosen(file_path);
390 break;
290 } 391 }
392 case GameListItemType::AddDir:
393 emit AddDirectory();
394 break;
395 }
396}
291 397
292 // Users usually want to run a diffrent game after closing one 398bool GameList::isEmpty() const {
293 search_field->clear(); 399 for (int i = 0; i < item_model->rowCount(); i++) {
294 emit GameChosen(file_path); 400 const QStandardItem* child = item_model->invisibleRootItem()->child(i);
401 const auto type = static_cast<GameListItemType>(child->type());
402 if (!child->hasChildren() &&
403 (type == GameListItemType::SdmcDir || type == GameListItemType::UserNandDir ||
404 type == GameListItemType::SysNandDir)) {
405 item_model->invisibleRootItem()->removeRow(child->row());
406 i--;
407 };
408 }
409 return !item_model->invisibleRootItem()->hasChildren();
295} 410}
296 411
297void GameList::DonePopulating(QStringList watch_list) { 412void GameList::DonePopulating(QStringList watch_list) {
413 emit ShowList(!isEmpty());
414
415 item_model->invisibleRootItem()->appendRow(new GameListAddDir());
416
298 // Clear out the old directories to watch for changes and add the new ones 417 // Clear out the old directories to watch for changes and add the new ones
299 auto watch_dirs = watcher->directories(); 418 auto watch_dirs = watcher->directories();
300 if (!watch_dirs.isEmpty()) { 419 if (!watch_dirs.isEmpty()) {
@@ -311,9 +430,13 @@ void GameList::DonePopulating(QStringList watch_list) {
311 QCoreApplication::processEvents(); 430 QCoreApplication::processEvents();
312 } 431 }
313 tree_view->setEnabled(true); 432 tree_view->setEnabled(true);
314 int rowCount = tree_view->model()->rowCount(); 433 const int folder_count = tree_view->model()->rowCount();
315 search_field->setFilterResult(rowCount, rowCount); 434 int children_total = 0;
316 if (rowCount > 0) { 435 for (int i = 0; i < folder_count; ++i) {
436 children_total += item_model->item(i, 0)->rowCount();
437 }
438 search_field->setFilterResult(children_total, children_total);
439 if (children_total > 0) {
317 search_field->setFocus(); 440 search_field->setFocus();
318 } 441 }
319} 442}
@@ -323,12 +446,27 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
323 if (!item.isValid()) 446 if (!item.isValid())
324 return; 447 return;
325 448
326 int row = item_model->itemFromIndex(item)->row(); 449 const auto selected = item.sibling(item.row(), 0);
327 QStandardItem* child_file = item_model->invisibleRootItem()->child(row, COLUMN_NAME);
328 u64 program_id = child_file->data(GameListItemPath::ProgramIdRole).toULongLong();
329 std::string path = child_file->data(GameListItemPath::FullPathRole).toString().toStdString();
330
331 QMenu context_menu; 450 QMenu context_menu;
451 switch (selected.data(GameListItem::TypeRole).value<GameListItemType>()) {
452 case GameListItemType::Game:
453 AddGamePopup(context_menu, selected.data(GameListItemPath::ProgramIdRole).toULongLong(),
454 selected.data(GameListItemPath::FullPathRole).toString().toStdString());
455 break;
456 case GameListItemType::CustomDir:
457 AddPermDirPopup(context_menu, selected);
458 AddCustomDirPopup(context_menu, selected);
459 break;
460 case GameListItemType::SdmcDir:
461 case GameListItemType::UserNandDir:
462 case GameListItemType::SysNandDir:
463 AddPermDirPopup(context_menu, selected);
464 break;
465 }
466 context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location));
467}
468
469void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string path) {
332 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); 470 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
333 QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location")); 471 QAction* open_lfs_location = context_menu.addAction(tr("Open Mod Data Location"));
334 QAction* open_transferable_shader_cache = 472 QAction* open_transferable_shader_cache =
@@ -344,19 +482,86 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
344 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); 482 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
345 navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0); 483 navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0);
346 484
347 connect(open_save_location, &QAction::triggered, 485 connect(open_save_location, &QAction::triggered, [this, program_id]() {
348 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); }); 486 emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData);
349 connect(open_lfs_location, &QAction::triggered, 487 });
350 [&]() { emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); }); 488 connect(open_lfs_location, &QAction::triggered, [this, program_id]() {
489 emit OpenFolderRequested(program_id, GameListOpenTarget::ModData);
490 });
351 connect(open_transferable_shader_cache, &QAction::triggered, 491 connect(open_transferable_shader_cache, &QAction::triggered,
352 [&]() { emit OpenTransferableShaderCacheRequested(program_id); }); 492 [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); });
353 connect(dump_romfs, &QAction::triggered, [&]() { emit DumpRomFSRequested(program_id, path); }); 493 connect(dump_romfs, &QAction::triggered,
354 connect(copy_tid, &QAction::triggered, [&]() { emit CopyTIDRequested(program_id); }); 494 [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); });
355 connect(navigate_to_gamedb_entry, &QAction::triggered, 495 connect(copy_tid, &QAction::triggered,
356 [&]() { emit NavigateToGamedbEntryRequested(program_id, compatibility_list); }); 496 [this, program_id]() { emit CopyTIDRequested(program_id); });
357 connect(properties, &QAction::triggered, [&]() { emit OpenPerGameGeneralRequested(path); }); 497 connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() {
498 emit NavigateToGamedbEntryRequested(program_id, compatibility_list);
499 });
500 connect(properties, &QAction::triggered,
501 [this, path]() { emit OpenPerGameGeneralRequested(path); });
502};
503
504void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) {
505 UISettings::GameDir& game_dir =
506 *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
507
508 QAction* deep_scan = context_menu.addAction(tr("Scan Subfolders"));
509 QAction* delete_dir = context_menu.addAction(tr("Remove Game Directory"));
510
511 deep_scan->setCheckable(true);
512 deep_scan->setChecked(game_dir.deep_scan);
513
514 connect(deep_scan, &QAction::triggered, [this, &game_dir] {
515 game_dir.deep_scan = !game_dir.deep_scan;
516 PopulateAsync(UISettings::values.game_dirs);
517 });
518 connect(delete_dir, &QAction::triggered, [this, &game_dir, selected] {
519 UISettings::values.game_dirs.removeOne(game_dir);
520 item_model->invisibleRootItem()->removeRow(selected.row());
521 });
522}
358 523
359 context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location)); 524void GameList::AddPermDirPopup(QMenu& context_menu, QModelIndex selected) {
525 UISettings::GameDir& game_dir =
526 *selected.data(GameListDir::GameDirRole).value<UISettings::GameDir*>();
527
528 QAction* move_up = context_menu.addAction(tr(u8"\U000025b2 Move Up"));
529 QAction* move_down = context_menu.addAction(tr(u8"\U000025bc Move Down "));
530 QAction* open_directory_location = context_menu.addAction(tr("Open Directory Location"));
531
532 const int row = selected.row();
533
534 move_up->setEnabled(row > 0);
535 move_down->setEnabled(row < item_model->rowCount() - 2);
536
537 connect(move_up, &QAction::triggered, [this, selected, row, &game_dir] {
538 // find the indices of the items in settings and swap them
539 std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)],
540 UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(
541 *selected.sibling(row - 1, 0)
542 .data(GameListDir::GameDirRole)
543 .value<UISettings::GameDir*>())]);
544 // move the treeview items
545 QList<QStandardItem*> item = item_model->takeRow(row);
546 item_model->invisibleRootItem()->insertRow(row - 1, item);
547 tree_view->setExpanded(selected, game_dir.expanded);
548 });
549
550 connect(move_down, &QAction::triggered, [this, selected, row, &game_dir] {
551 // find the indices of the items in settings and swap them
552 std::swap(UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(game_dir)],
553 UISettings::values.game_dirs[UISettings::values.game_dirs.indexOf(
554 *selected.sibling(row + 1, 0)
555 .data(GameListDir::GameDirRole)
556 .value<UISettings::GameDir*>())]);
557 // move the treeview items
558 const QList<QStandardItem*> item = item_model->takeRow(row);
559 item_model->invisibleRootItem()->insertRow(row + 1, item);
560 tree_view->setExpanded(selected, game_dir.expanded);
561 });
562
563 connect(open_directory_location, &QAction::triggered,
564 [this, game_dir] { emit OpenDirectory(game_dir.path); });
360} 565}
361 566
362void GameList::LoadCompatibilityList() { 567void GameList::LoadCompatibilityList() {
@@ -403,14 +608,7 @@ void GameList::LoadCompatibilityList() {
403 } 608 }
404} 609}
405 610
406void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) { 611void GameList::PopulateAsync(QVector<UISettings::GameDir>& game_dirs) {
407 const QFileInfo dir_info{dir_path};
408 if (!dir_info.exists() || !dir_info.isDir()) {
409 LOG_ERROR(Frontend, "Could not find game list folder at {}", dir_path.toStdString());
410 search_field->setFilterResult(0, 0);
411 return;
412 }
413
414 tree_view->setEnabled(false); 612 tree_view->setEnabled(false);
415 613
416 // Update the columns in case UISettings has changed 614 // Update the columns in case UISettings has changed
@@ -433,17 +631,19 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) {
433 631
434 // Delete any rows that might already exist if we're repopulating 632 // Delete any rows that might already exist if we're repopulating
435 item_model->removeRows(0, item_model->rowCount()); 633 item_model->removeRows(0, item_model->rowCount());
634 search_field->clear();
436 635
437 emit ShouldCancelWorker(); 636 emit ShouldCancelWorker();
438 637
439 GameListWorker* worker = 638 GameListWorker* worker = new GameListWorker(vfs, provider, game_dirs, compatibility_list);
440 new GameListWorker(vfs, provider, dir_path, deep_scan, compatibility_list);
441 639
442 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); 640 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection);
641 connect(worker, &GameListWorker::DirEntryReady, this, &GameList::AddDirEntry,
642 Qt::QueuedConnection);
443 connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating, 643 connect(worker, &GameListWorker::Finished, this, &GameList::DonePopulating,
444 Qt::QueuedConnection); 644 Qt::QueuedConnection);
445 // Use DirectConnection here because worker->Cancel() is thread-safe and we want it to cancel 645 // Use DirectConnection here because worker->Cancel() is thread-safe and we want it to
446 // without delay. 646 // cancel without delay.
447 connect(this, &GameList::ShouldCancelWorker, worker, &GameListWorker::Cancel, 647 connect(this, &GameList::ShouldCancelWorker, worker, &GameListWorker::Cancel,
448 Qt::DirectConnection); 648 Qt::DirectConnection);
449 649
@@ -471,10 +671,40 @@ const QStringList GameList::supported_file_extensions = {
471 QStringLiteral("xci"), QStringLiteral("nsp"), QStringLiteral("kip")}; 671 QStringLiteral("xci"), QStringLiteral("nsp"), QStringLiteral("kip")};
472 672
473void GameList::RefreshGameDirectory() { 673void GameList::RefreshGameDirectory() {
474 if (!UISettings::values.game_directory_path.isEmpty() && current_worker != nullptr) { 674 if (!UISettings::values.game_dirs.isEmpty() && current_worker != nullptr) {
475 LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list."); 675 LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
476 search_field->clear(); 676 PopulateAsync(UISettings::values.game_dirs);
477 PopulateAsync(UISettings::values.game_directory_path,
478 UISettings::values.game_directory_deepscan);
479 } 677 }
480} 678}
679
680GameListPlaceholder::GameListPlaceholder(GMainWindow* parent) : QWidget{parent} {
681 connect(parent, &GMainWindow::UpdateThemedIcons, this,
682 &GameListPlaceholder::onUpdateThemedIcons);
683
684 layout = new QVBoxLayout;
685 image = new QLabel;
686 text = new QLabel;
687 layout->setAlignment(Qt::AlignCenter);
688 image->setPixmap(QIcon::fromTheme(QStringLiteral("plus_folder")).pixmap(200));
689
690 text->setText(tr("Double-click to add a new folder to the game list"));
691 QFont font = text->font();
692 font.setPointSize(20);
693 text->setFont(font);
694 text->setAlignment(Qt::AlignHCenter);
695 image->setAlignment(Qt::AlignHCenter);
696
697 layout->addWidget(image);
698 layout->addWidget(text);
699 setLayout(layout);
700}
701
702GameListPlaceholder::~GameListPlaceholder() = default;
703
704void GameListPlaceholder::onUpdateThemedIcons() {
705 image->setPixmap(QIcon::fromTheme(QStringLiteral("plus_folder")).pixmap(200));
706}
707
708void GameListPlaceholder::mouseDoubleClickEvent(QMouseEvent* event) {
709 emit GameListPlaceholder::AddDirectory();
710}
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index f8f8bd6c5..878d94413 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -8,6 +8,7 @@
8#include <QHBoxLayout> 8#include <QHBoxLayout>
9#include <QLabel> 9#include <QLabel>
10#include <QLineEdit> 10#include <QLineEdit>
11#include <QList>
11#include <QModelIndex> 12#include <QModelIndex>
12#include <QSettings> 13#include <QSettings>
13#include <QStandardItem> 14#include <QStandardItem>
@@ -16,13 +17,16 @@
16#include <QToolButton> 17#include <QToolButton>
17#include <QTreeView> 18#include <QTreeView>
18#include <QVBoxLayout> 19#include <QVBoxLayout>
20#include <QVector>
19#include <QWidget> 21#include <QWidget>
20 22
21#include "common/common_types.h" 23#include "common/common_types.h"
24#include "uisettings.h"
22#include "yuzu/compatibility_list.h" 25#include "yuzu/compatibility_list.h"
23 26
24class GameListWorker; 27class GameListWorker;
25class GameListSearchField; 28class GameListSearchField;
29class GameListDir;
26class GMainWindow; 30class GMainWindow;
27 31
28namespace FileSys { 32namespace FileSys {
@@ -52,12 +56,14 @@ public:
52 FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr); 56 FileSys::ManualContentProvider* provider, GMainWindow* parent = nullptr);
53 ~GameList() override; 57 ~GameList() override;
54 58
59 QString getLastFilterResultItem() const;
55 void clearFilter(); 60 void clearFilter();
56 void setFilterFocus(); 61 void setFilterFocus();
57 void setFilterVisible(bool visibility); 62 void setFilterVisible(bool visibility);
63 bool isEmpty() const;
58 64
59 void LoadCompatibilityList(); 65 void LoadCompatibilityList();
60 void PopulateAsync(const QString& dir_path, bool deep_scan); 66 void PopulateAsync(QVector<UISettings::GameDir>& game_dirs);
61 67
62 void SaveInterfaceLayout(); 68 void SaveInterfaceLayout();
63 void LoadInterfaceLayout(); 69 void LoadInterfaceLayout();
@@ -74,19 +80,29 @@ signals:
74 void NavigateToGamedbEntryRequested(u64 program_id, 80 void NavigateToGamedbEntryRequested(u64 program_id,
75 const CompatibilityList& compatibility_list); 81 const CompatibilityList& compatibility_list);
76 void OpenPerGameGeneralRequested(const std::string& file); 82 void OpenPerGameGeneralRequested(const std::string& file);
83 void OpenDirectory(const QString& directory);
84 void AddDirectory();
85 void ShowList(bool show);
77 86
78private slots: 87private slots:
88 void onItemExpanded(const QModelIndex& item);
79 void onTextChanged(const QString& new_text); 89 void onTextChanged(const QString& new_text);
80 void onFilterCloseClicked(); 90 void onFilterCloseClicked();
91 void onUpdateThemedIcons();
81 92
82private: 93private:
83 void AddEntry(const QList<QStandardItem*>& entry_items); 94 void AddDirEntry(GameListDir* entry_items);
95 void AddEntry(const QList<QStandardItem*>& entry_items, GameListDir* parent);
84 void ValidateEntry(const QModelIndex& item); 96 void ValidateEntry(const QModelIndex& item);
85 void DonePopulating(QStringList watch_list); 97 void DonePopulating(QStringList watch_list);
86 98
87 void PopupContextMenu(const QPoint& menu_location);
88 void RefreshGameDirectory(); 99 void RefreshGameDirectory();
89 100
101 void PopupContextMenu(const QPoint& menu_location);
102 void AddGamePopup(QMenu& context_menu, u64 program_id, std::string path);
103 void AddCustomDirPopup(QMenu& context_menu, QModelIndex selected);
104 void AddPermDirPopup(QMenu& context_menu, QModelIndex selected);
105
90 std::shared_ptr<FileSys::VfsFilesystem> vfs; 106 std::shared_ptr<FileSys::VfsFilesystem> vfs;
91 FileSys::ManualContentProvider* provider; 107 FileSys::ManualContentProvider* provider;
92 GameListSearchField* search_field; 108 GameListSearchField* search_field;
@@ -102,3 +118,24 @@ private:
102}; 118};
103 119
104Q_DECLARE_METATYPE(GameListOpenTarget); 120Q_DECLARE_METATYPE(GameListOpenTarget);
121
122class GameListPlaceholder : public QWidget {
123 Q_OBJECT
124public:
125 explicit GameListPlaceholder(GMainWindow* parent = nullptr);
126 ~GameListPlaceholder();
127
128signals:
129 void AddDirectory();
130
131private slots:
132 void onUpdateThemedIcons();
133
134protected:
135 void mouseDoubleClickEvent(QMouseEvent* event) override;
136
137private:
138 QVBoxLayout* layout = nullptr;
139 QLabel* image = nullptr;
140 QLabel* text = nullptr;
141};
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 0b458ef48..a8d888fee 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -10,6 +10,7 @@
10#include <utility> 10#include <utility>
11 11
12#include <QCoreApplication> 12#include <QCoreApplication>
13#include <QFileInfo>
13#include <QImage> 14#include <QImage>
14#include <QObject> 15#include <QObject>
15#include <QStandardItem> 16#include <QStandardItem>
@@ -19,9 +20,20 @@
19#include "common/common_types.h" 20#include "common/common_types.h"
20#include "common/logging/log.h" 21#include "common/logging/log.h"
21#include "common/string_util.h" 22#include "common/string_util.h"
22#include "yuzu/ui_settings.h" 23#include "yuzu/uisettings.h"
23#include "yuzu/util/util.h" 24#include "yuzu/util/util.h"
24 25
26enum class GameListItemType {
27 Game = QStandardItem::UserType + 1,
28 CustomDir = QStandardItem::UserType + 2,
29 SdmcDir = QStandardItem::UserType + 3,
30 UserNandDir = QStandardItem::UserType + 4,
31 SysNandDir = QStandardItem::UserType + 5,
32 AddDir = QStandardItem::UserType + 6
33};
34
35Q_DECLARE_METATYPE(GameListItemType);
36
25/** 37/**
26 * Gets the default icon (for games without valid title metadata) 38 * Gets the default icon (for games without valid title metadata)
27 * @param size The desired width and height of the default icon. 39 * @param size The desired width and height of the default icon.
@@ -36,8 +48,13 @@ static QPixmap GetDefaultIcon(u32 size) {
36class GameListItem : public QStandardItem { 48class GameListItem : public QStandardItem {
37 49
38public: 50public:
51 // used to access type from item index
52 static const int TypeRole = Qt::UserRole + 1;
53 static const int SortRole = Qt::UserRole + 2;
39 GameListItem() = default; 54 GameListItem() = default;
40 explicit GameListItem(const QString& string) : QStandardItem(string) {} 55 GameListItem(const QString& string) : QStandardItem(string) {
56 setData(string, SortRole);
57 }
41}; 58};
42 59
43/** 60/**
@@ -48,14 +65,15 @@ public:
48 */ 65 */
49class GameListItemPath : public GameListItem { 66class GameListItemPath : public GameListItem {
50public: 67public:
51 static const int FullPathRole = Qt::UserRole + 1; 68 static const int TitleRole = SortRole;
52 static const int TitleRole = Qt::UserRole + 2; 69 static const int FullPathRole = SortRole + 1;
53 static const int ProgramIdRole = Qt::UserRole + 3; 70 static const int ProgramIdRole = SortRole + 2;
54 static const int FileTypeRole = Qt::UserRole + 4; 71 static const int FileTypeRole = SortRole + 3;
55 72
56 GameListItemPath() = default; 73 GameListItemPath() = default;
57 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, 74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data,
58 const QString& game_name, const QString& game_type, u64 program_id) { 75 const QString& game_name, const QString& game_type, u64 program_id) {
76 setData(type(), TypeRole);
59 setData(game_path, FullPathRole); 77 setData(game_path, FullPathRole);
60 setData(game_name, TitleRole); 78 setData(game_name, TitleRole);
61 setData(qulonglong(program_id), ProgramIdRole); 79 setData(qulonglong(program_id), ProgramIdRole);
@@ -72,6 +90,10 @@ public:
72 setData(picture, Qt::DecorationRole); 90 setData(picture, Qt::DecorationRole);
73 } 91 }
74 92
93 int type() const override {
94 return static_cast<int>(GameListItemType::Game);
95 }
96
75 QVariant data(int role) const override { 97 QVariant data(int role) const override {
76 if (role == Qt::DisplayRole) { 98 if (role == Qt::DisplayRole) {
77 std::string filename; 99 std::string filename;
@@ -103,9 +125,11 @@ public:
103class GameListItemCompat : public GameListItem { 125class GameListItemCompat : public GameListItem {
104 Q_DECLARE_TR_FUNCTIONS(GameListItemCompat) 126 Q_DECLARE_TR_FUNCTIONS(GameListItemCompat)
105public: 127public:
106 static const int CompatNumberRole = Qt::UserRole + 1; 128 static const int CompatNumberRole = SortRole;
107 GameListItemCompat() = default; 129 GameListItemCompat() = default;
108 explicit GameListItemCompat(const QString& compatibility) { 130 explicit GameListItemCompat(const QString& compatibility) {
131 setData(type(), TypeRole);
132
109 struct CompatStatus { 133 struct CompatStatus {
110 QString color; 134 QString color;
111 const char* text; 135 const char* text;
@@ -135,6 +159,10 @@ public:
135 setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole); 159 setData(CreateCirclePixmapFromColor(status.color), Qt::DecorationRole);
136 } 160 }
137 161
162 int type() const override {
163 return static_cast<int>(GameListItemType::Game);
164 }
165
138 bool operator<(const QStandardItem& other) const override { 166 bool operator<(const QStandardItem& other) const override {
139 return data(CompatNumberRole) < other.data(CompatNumberRole); 167 return data(CompatNumberRole) < other.data(CompatNumberRole);
140 } 168 }
@@ -146,12 +174,12 @@ public:
146 * human-readable string representation will be displayed to the user. 174 * human-readable string representation will be displayed to the user.
147 */ 175 */
148class GameListItemSize : public GameListItem { 176class GameListItemSize : public GameListItem {
149
150public: 177public:
151 static const int SizeRole = Qt::UserRole + 1; 178 static const int SizeRole = SortRole;
152 179
153 GameListItemSize() = default; 180 GameListItemSize() = default;
154 explicit GameListItemSize(const qulonglong size_bytes) { 181 explicit GameListItemSize(const qulonglong size_bytes) {
182 setData(type(), TypeRole);
155 setData(size_bytes, SizeRole); 183 setData(size_bytes, SizeRole);
156 } 184 }
157 185
@@ -167,6 +195,10 @@ public:
167 } 195 }
168 } 196 }
169 197
198 int type() const override {
199 return static_cast<int>(GameListItemType::Game);
200 }
201
170 /** 202 /**
171 * This operator is, in practice, only used by the TreeView sorting systems. 203 * This operator is, in practice, only used by the TreeView sorting systems.
172 * Override it so that it will correctly sort by numerical value instead of by string 204 * Override it so that it will correctly sort by numerical value instead of by string
@@ -177,6 +209,82 @@ public:
177 } 209 }
178}; 210};
179 211
212class GameListDir : public GameListItem {
213public:
214 static const int GameDirRole = Qt::UserRole + 2;
215
216 explicit GameListDir(UISettings::GameDir& directory,
217 GameListItemType dir_type = GameListItemType::CustomDir)
218 : dir_type{dir_type} {
219 setData(type(), TypeRole);
220
221 UISettings::GameDir* game_dir = &directory;
222 setData(QVariant::fromValue(game_dir), GameDirRole);
223
224 const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
225 switch (dir_type) {
226 case GameListItemType::SdmcDir:
227 setData(
228 QIcon::fromTheme(QStringLiteral("sd_card"))
229 .pixmap(icon_size)
230 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
231 Qt::DecorationRole);
232 setData(QObject::tr("Installed SD Titles"), Qt::DisplayRole);
233 break;
234 case GameListItemType::UserNandDir:
235 setData(
236 QIcon::fromTheme(QStringLiteral("chip"))
237 .pixmap(icon_size)
238 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
239 Qt::DecorationRole);
240 setData(QObject::tr("Installed NAND Titles"), Qt::DisplayRole);
241 break;
242 case GameListItemType::SysNandDir:
243 setData(
244 QIcon::fromTheme(QStringLiteral("chip"))
245 .pixmap(icon_size)
246 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
247 Qt::DecorationRole);
248 setData(QObject::tr("System Titles"), Qt::DisplayRole);
249 break;
250 case GameListItemType::CustomDir:
251 const QString icon_name = QFileInfo::exists(game_dir->path)
252 ? QStringLiteral("folder")
253 : QStringLiteral("bad_folder");
254 setData(QIcon::fromTheme(icon_name).pixmap(icon_size).scaled(
255 icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
256 Qt::DecorationRole);
257 setData(game_dir->path, Qt::DisplayRole);
258 break;
259 };
260 };
261
262 int type() const override {
263 return static_cast<int>(dir_type);
264 }
265
266private:
267 GameListItemType dir_type;
268};
269
270class GameListAddDir : public GameListItem {
271public:
272 explicit GameListAddDir() {
273 setData(type(), TypeRole);
274
275 const int icon_size = std::min(static_cast<int>(UISettings::values.icon_size), 64);
276 setData(QIcon::fromTheme(QStringLiteral("plus"))
277 .pixmap(icon_size)
278 .scaled(icon_size, icon_size, Qt::IgnoreAspectRatio, Qt::SmoothTransformation),
279 Qt::DecorationRole);
280 setData(QObject::tr("Add New Game Directory"), Qt::DisplayRole);
281 }
282
283 int type() const override {
284 return static_cast<int>(GameListItemType::AddDir);
285 }
286};
287
180class GameList; 288class GameList;
181class QHBoxLayout; 289class QHBoxLayout;
182class QTreeView; 290class QTreeView;
@@ -208,6 +316,9 @@ private:
208 // EventFilter in order to process systemkeys while editing the searchfield 316 // EventFilter in order to process systemkeys while editing the searchfield
209 bool eventFilter(QObject* obj, QEvent* event) override; 317 bool eventFilter(QObject* obj, QEvent* event) override;
210 }; 318 };
319 int visible;
320 int total;
321
211 QHBoxLayout* layout_filter = nullptr; 322 QHBoxLayout* layout_filter = nullptr;
212 QTreeView* tree_view = nullptr; 323 QTreeView* tree_view = nullptr;
213 QLabel* label_filter = nullptr; 324 QLabel* label_filter = nullptr;
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index 4f30e9147..fd21a9761 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -29,7 +29,7 @@
29#include "yuzu/game_list.h" 29#include "yuzu/game_list.h"
30#include "yuzu/game_list_p.h" 30#include "yuzu/game_list_p.h"
31#include "yuzu/game_list_worker.h" 31#include "yuzu/game_list_worker.h"
32#include "yuzu/ui_settings.h" 32#include "yuzu/uisettings.h"
33 33
34namespace { 34namespace {
35 35
@@ -223,21 +223,37 @@ QList<QStandardItem*> MakeGameListEntry(const std::string& path, const std::stri
223} // Anonymous namespace 223} // Anonymous namespace
224 224
225GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs, 225GameListWorker::GameListWorker(FileSys::VirtualFilesystem vfs,
226 FileSys::ManualContentProvider* provider, QString dir_path, 226 FileSys::ManualContentProvider* provider,
227 bool deep_scan, const CompatibilityList& compatibility_list) 227 QVector<UISettings::GameDir>& game_dirs,
228 : vfs(std::move(vfs)), provider(provider), dir_path(std::move(dir_path)), deep_scan(deep_scan), 228 const CompatibilityList& compatibility_list)
229 : vfs(std::move(vfs)), provider(provider), game_dirs(game_dirs),
229 compatibility_list(compatibility_list) {} 230 compatibility_list(compatibility_list) {}
230 231
231GameListWorker::~GameListWorker() = default; 232GameListWorker::~GameListWorker() = default;
232 233
233void GameListWorker::AddTitlesToGameList() { 234void GameListWorker::AddTitlesToGameList(GameListDir* parent_dir) {
234 const auto& cache = dynamic_cast<FileSys::ContentProviderUnion&>( 235 using namespace FileSys;
235 Core::System::GetInstance().GetContentProvider()); 236
236 const auto installed_games = cache.ListEntriesFilterOrigin( 237 const auto& cache =
237 std::nullopt, FileSys::TitleType::Application, FileSys::ContentRecordType::Program); 238 dynamic_cast<ContentProviderUnion&>(Core::System::GetInstance().GetContentProvider());
239
240 std::vector<std::pair<ContentProviderUnionSlot, ContentProviderEntry>> installed_games;
241 installed_games = cache.ListEntriesFilterOrigin(std::nullopt, TitleType::Application,
242 ContentRecordType::Program);
243
244 if (parent_dir->type() == static_cast<int>(GameListItemType::SdmcDir)) {
245 installed_games = cache.ListEntriesFilterOrigin(
246 ContentProviderUnionSlot::SDMC, TitleType::Application, ContentRecordType::Program);
247 } else if (parent_dir->type() == static_cast<int>(GameListItemType::UserNandDir)) {
248 installed_games = cache.ListEntriesFilterOrigin(
249 ContentProviderUnionSlot::UserNAND, TitleType::Application, ContentRecordType::Program);
250 } else if (parent_dir->type() == static_cast<int>(GameListItemType::SysNandDir)) {
251 installed_games = cache.ListEntriesFilterOrigin(
252 ContentProviderUnionSlot::SysNAND, TitleType::Application, ContentRecordType::Program);
253 }
238 254
239 for (const auto& [slot, game] : installed_games) { 255 for (const auto& [slot, game] : installed_games) {
240 if (slot == FileSys::ContentProviderUnionSlot::FrontendManual) 256 if (slot == ContentProviderUnionSlot::FrontendManual)
241 continue; 257 continue;
242 258
243 const auto file = cache.GetEntryUnparsed(game.title_id, game.type); 259 const auto file = cache.GetEntryUnparsed(game.title_id, game.type);
@@ -250,21 +266,22 @@ void GameListWorker::AddTitlesToGameList() {
250 u64 program_id = 0; 266 u64 program_id = 0;
251 loader->ReadProgramId(program_id); 267 loader->ReadProgramId(program_id);
252 268
253 const FileSys::PatchManager patch{program_id}; 269 const PatchManager patch{program_id};
254 const auto control = cache.GetEntry(game.title_id, FileSys::ContentRecordType::Control); 270 const auto control = cache.GetEntry(game.title_id, ContentRecordType::Control);
255 if (control != nullptr) 271 if (control != nullptr)
256 GetMetadataFromControlNCA(patch, *control, icon, name); 272 GetMetadataFromControlNCA(patch, *control, icon, name);
257 273
258 emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id, 274 emit EntryReady(MakeGameListEntry(file->GetFullPath(), name, icon, *loader, program_id,
259 compatibility_list, patch)); 275 compatibility_list, patch),
276 parent_dir);
260 } 277 }
261} 278}
262 279
263void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path, 280void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_path,
264 unsigned int recursion) { 281 unsigned int recursion, GameListDir* parent_dir) {
265 const auto callback = [this, target, recursion](u64* num_entries_out, 282 const auto callback = [this, target, recursion,
266 const std::string& directory, 283 parent_dir](u64* num_entries_out, const std::string& directory,
267 const std::string& virtual_name) -> bool { 284 const std::string& virtual_name) -> bool {
268 if (stop_processing) { 285 if (stop_processing) {
269 // Breaks the callback loop. 286 // Breaks the callback loop.
270 return false; 287 return false;
@@ -317,11 +334,12 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa
317 const FileSys::PatchManager patch{program_id}; 334 const FileSys::PatchManager patch{program_id};
318 335
319 emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id, 336 emit EntryReady(MakeGameListEntry(physical_name, name, icon, *loader, program_id,
320 compatibility_list, patch)); 337 compatibility_list, patch),
338 parent_dir);
321 } 339 }
322 } else if (is_dir && recursion > 0) { 340 } else if (is_dir && recursion > 0) {
323 watch_list.append(QString::fromStdString(physical_name)); 341 watch_list.append(QString::fromStdString(physical_name));
324 ScanFileSystem(target, physical_name, recursion - 1); 342 ScanFileSystem(target, physical_name, recursion - 1, parent_dir);
325 } 343 }
326 344
327 return true; 345 return true;
@@ -332,12 +350,32 @@ void GameListWorker::ScanFileSystem(ScanTarget target, const std::string& dir_pa
332 350
333void GameListWorker::run() { 351void GameListWorker::run() {
334 stop_processing = false; 352 stop_processing = false;
335 watch_list.append(dir_path); 353
336 provider->ClearAllEntries(); 354 for (UISettings::GameDir& game_dir : game_dirs) {
337 ScanFileSystem(ScanTarget::FillManualContentProvider, dir_path.toStdString(), 355 if (game_dir.path == QStringLiteral("SDMC")) {
338 deep_scan ? 256 : 0); 356 auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SdmcDir);
339 AddTitlesToGameList(); 357 emit DirEntryReady({game_list_dir});
340 ScanFileSystem(ScanTarget::PopulateGameList, dir_path.toStdString(), deep_scan ? 256 : 0); 358 AddTitlesToGameList(game_list_dir);
359 } else if (game_dir.path == QStringLiteral("UserNAND")) {
360 auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::UserNandDir);
361 emit DirEntryReady({game_list_dir});
362 AddTitlesToGameList(game_list_dir);
363 } else if (game_dir.path == QStringLiteral("SysNAND")) {
364 auto* const game_list_dir = new GameListDir(game_dir, GameListItemType::SysNandDir);
365 emit DirEntryReady({game_list_dir});
366 AddTitlesToGameList(game_list_dir);
367 } else {
368 watch_list.append(game_dir.path);
369 auto* const game_list_dir = new GameListDir(game_dir);
370 emit DirEntryReady({game_list_dir});
371 provider->ClearAllEntries();
372 ScanFileSystem(ScanTarget::FillManualContentProvider, game_dir.path.toStdString(), 2,
373 game_list_dir);
374 ScanFileSystem(ScanTarget::PopulateGameList, game_dir.path.toStdString(),
375 game_dir.deep_scan ? 256 : 0, game_list_dir);
376 }
377 };
378
341 emit Finished(watch_list); 379 emit Finished(watch_list);
342} 380}
343 381
diff --git a/src/yuzu/game_list_worker.h b/src/yuzu/game_list_worker.h
index 7c3074af9..6e52fca89 100644
--- a/src/yuzu/game_list_worker.h
+++ b/src/yuzu/game_list_worker.h
@@ -14,6 +14,7 @@
14#include <QObject> 14#include <QObject>
15#include <QRunnable> 15#include <QRunnable>
16#include <QString> 16#include <QString>
17#include <QVector>
17 18
18#include "common/common_types.h" 19#include "common/common_types.h"
19#include "yuzu/compatibility_list.h" 20#include "yuzu/compatibility_list.h"
@@ -33,9 +34,10 @@ class GameListWorker : public QObject, public QRunnable {
33 Q_OBJECT 34 Q_OBJECT
34 35
35public: 36public:
36 GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs, 37 explicit GameListWorker(std::shared_ptr<FileSys::VfsFilesystem> vfs,
37 FileSys::ManualContentProvider* provider, QString dir_path, bool deep_scan, 38 FileSys::ManualContentProvider* provider,
38 const CompatibilityList& compatibility_list); 39 QVector<UISettings::GameDir>& game_dirs,
40 const CompatibilityList& compatibility_list);
39 ~GameListWorker() override; 41 ~GameListWorker() override;
40 42
41 /// Starts the processing of directory tree information. 43 /// Starts the processing of directory tree information.
@@ -48,31 +50,33 @@ signals:
48 /** 50 /**
49 * The `EntryReady` signal is emitted once an entry has been prepared and is ready 51 * The `EntryReady` signal is emitted once an entry has been prepared and is ready
50 * to be added to the game list. 52 * to be added to the game list.
51 * @param entry_items a list with `QStandardItem`s that make up the columns of the new entry. 53 * @param entry_items a list with `QStandardItem`s that make up the columns of the new
54 * entry.
52 */ 55 */
53 void EntryReady(QList<QStandardItem*> entry_items); 56 void DirEntryReady(GameListDir* entry_items);
57 void EntryReady(QList<QStandardItem*> entry_items, GameListDir* parent_dir);
54 58
55 /** 59 /**
56 * After the worker has traversed the game directory looking for entries, this signal is emitted 60 * After the worker has traversed the game directory looking for entries, this signal is
57 * with a list of folders that should be watched for changes as well. 61 * emitted with a list of folders that should be watched for changes as well.
58 */ 62 */
59 void Finished(QStringList watch_list); 63 void Finished(QStringList watch_list);
60 64
61private: 65private:
62 void AddTitlesToGameList(); 66 void AddTitlesToGameList(GameListDir* parent_dir);
63 67
64 enum class ScanTarget { 68 enum class ScanTarget {
65 FillManualContentProvider, 69 FillManualContentProvider,
66 PopulateGameList, 70 PopulateGameList,
67 }; 71 };
68 72
69 void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion = 0); 73 void ScanFileSystem(ScanTarget target, const std::string& dir_path, unsigned int recursion,
74 GameListDir* parent_dir);
70 75
71 std::shared_ptr<FileSys::VfsFilesystem> vfs; 76 std::shared_ptr<FileSys::VfsFilesystem> vfs;
72 FileSys::ManualContentProvider* provider; 77 FileSys::ManualContentProvider* provider;
73 QStringList watch_list; 78 QStringList watch_list;
74 QString dir_path;
75 bool deep_scan;
76 const CompatibilityList& compatibility_list; 79 const CompatibilityList& compatibility_list;
80 QVector<UISettings::GameDir>& game_dirs;
77 std::atomic_bool stop_processing; 81 std::atomic_bool stop_processing;
78}; 82};
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp
index 4582e7f21..d4e97fa16 100644
--- a/src/yuzu/hotkeys.cpp
+++ b/src/yuzu/hotkeys.cpp
@@ -7,7 +7,7 @@
7#include <QTreeWidgetItem> 7#include <QTreeWidgetItem>
8#include <QtGlobal> 8#include <QtGlobal>
9#include "yuzu/hotkeys.h" 9#include "yuzu/hotkeys.h"
10#include "yuzu/ui_settings.h" 10#include "yuzu/uisettings.h"
11 11
12HotkeyRegistry::HotkeyRegistry() = default; 12HotkeyRegistry::HotkeyRegistry() = default;
13HotkeyRegistry::~HotkeyRegistry() = default; 13HotkeyRegistry::~HotkeyRegistry() = default;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 47e46f574..8304c6517 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -6,6 +6,9 @@
6#include <clocale> 6#include <clocale>
7#include <memory> 7#include <memory>
8#include <thread> 8#include <thread>
9#ifdef __APPLE__
10#include <unistd.h> // for chdir
11#endif
9 12
10// VFS includes must be before glad as they will conflict with Windows file api, which uses defines. 13// VFS includes must be before glad as they will conflict with Windows file api, which uses defines.
11#include "applets/error.h" 14#include "applets/error.h"
@@ -100,7 +103,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
100#include "yuzu/hotkeys.h" 103#include "yuzu/hotkeys.h"
101#include "yuzu/loading_screen.h" 104#include "yuzu/loading_screen.h"
102#include "yuzu/main.h" 105#include "yuzu/main.h"
103#include "yuzu/ui_settings.h" 106#include "yuzu/uisettings.h"
104 107
105#ifdef USE_DISCORD_PRESENCE 108#ifdef USE_DISCORD_PRESENCE
106#include "yuzu/discord_impl.h" 109#include "yuzu/discord_impl.h"
@@ -119,6 +122,7 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
119#endif 122#endif
120 123
121#ifdef _WIN32 124#ifdef _WIN32
125#include <windows.h>
122extern "C" { 126extern "C" {
123// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable 127// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
124// graphics 128// graphics
@@ -215,8 +219,7 @@ GMainWindow::GMainWindow()
215 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning); 219 OnReinitializeKeys(ReinitializeKeyBehavior::NoWarning);
216 220
217 game_list->LoadCompatibilityList(); 221 game_list->LoadCompatibilityList();
218 game_list->PopulateAsync(UISettings::values.game_directory_path, 222 game_list->PopulateAsync(UISettings::values.game_dirs);
219 UISettings::values.game_directory_deepscan);
220 223
221 // Show one-time "callout" messages to the user 224 // Show one-time "callout" messages to the user
222 ShowTelemetryCallout(); 225 ShowTelemetryCallout();
@@ -426,6 +429,10 @@ void GMainWindow::InitializeWidgets() {
426 game_list = new GameList(vfs, provider.get(), this); 429 game_list = new GameList(vfs, provider.get(), this);
427 ui.horizontalLayout->addWidget(game_list); 430 ui.horizontalLayout->addWidget(game_list);
428 431
432 game_list_placeholder = new GameListPlaceholder(this);
433 ui.horizontalLayout->addWidget(game_list_placeholder);
434 game_list_placeholder->setVisible(false);
435
429 loading_screen = new LoadingScreen(this); 436 loading_screen = new LoadingScreen(this);
430 loading_screen->hide(); 437 loading_screen->hide();
431 ui.horizontalLayout->addWidget(loading_screen); 438 ui.horizontalLayout->addWidget(loading_screen);
@@ -659,6 +666,7 @@ void GMainWindow::RestoreUIState() {
659 666
660void GMainWindow::ConnectWidgetEvents() { 667void GMainWindow::ConnectWidgetEvents() {
661 connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile); 668 connect(game_list, &GameList::GameChosen, this, &GMainWindow::OnGameListLoadFile);
669 connect(game_list, &GameList::OpenDirectory, this, &GMainWindow::OnGameListOpenDirectory);
662 connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder); 670 connect(game_list, &GameList::OpenFolderRequested, this, &GMainWindow::OnGameListOpenFolder);
663 connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this, 671 connect(game_list, &GameList::OpenTransferableShaderCacheRequested, this,
664 &GMainWindow::OnTransferableShaderCacheOpenFile); 672 &GMainWindow::OnTransferableShaderCacheOpenFile);
@@ -666,6 +674,11 @@ void GMainWindow::ConnectWidgetEvents() {
666 connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID); 674 connect(game_list, &GameList::CopyTIDRequested, this, &GMainWindow::OnGameListCopyTID);
667 connect(game_list, &GameList::NavigateToGamedbEntryRequested, this, 675 connect(game_list, &GameList::NavigateToGamedbEntryRequested, this,
668 &GMainWindow::OnGameListNavigateToGamedbEntry); 676 &GMainWindow::OnGameListNavigateToGamedbEntry);
677 connect(game_list, &GameList::AddDirectory, this, &GMainWindow::OnGameListAddDirectory);
678 connect(game_list_placeholder, &GameListPlaceholder::AddDirectory, this,
679 &GMainWindow::OnGameListAddDirectory);
680 connect(game_list, &GameList::ShowList, this, &GMainWindow::OnGameListShowList);
681
669 connect(game_list, &GameList::OpenPerGameGeneralRequested, this, 682 connect(game_list, &GameList::OpenPerGameGeneralRequested, this,
670 &GMainWindow::OnGameListOpenPerGameProperties); 683 &GMainWindow::OnGameListOpenPerGameProperties);
671 684
@@ -683,8 +696,6 @@ void GMainWindow::ConnectMenuEvents() {
683 connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder); 696 connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder);
684 connect(ui.action_Install_File_NAND, &QAction::triggered, this, 697 connect(ui.action_Install_File_NAND, &QAction::triggered, this,
685 &GMainWindow::OnMenuInstallToNAND); 698 &GMainWindow::OnMenuInstallToNAND);
686 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this,
687 &GMainWindow::OnMenuSelectGameListRoot);
688 connect(ui.action_Select_NAND_Directory, &QAction::triggered, this, 699 connect(ui.action_Select_NAND_Directory, &QAction::triggered, this,
689 [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::NAND); }); 700 [this] { OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget::NAND); });
690 connect(ui.action_Select_SDMC_Directory, &QAction::triggered, this, 701 connect(ui.action_Select_SDMC_Directory, &QAction::triggered, this,
@@ -747,9 +758,24 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
747 } 758 }
748} 759}
749 760
761void GMainWindow::PreventOSSleep() {
762#ifdef _WIN32
763 SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED);
764#endif
765}
766
767void GMainWindow::AllowOSSleep() {
768#ifdef _WIN32
769 SetThreadExecutionState(ES_CONTINUOUS);
770#endif
771}
772
750QStringList GMainWindow::GetUnsupportedGLExtensions() { 773QStringList GMainWindow::GetUnsupportedGLExtensions() {
751 QStringList unsupported_ext; 774 QStringList unsupported_ext;
752 775
776 if (!GLAD_GL_ARB_buffer_storage) {
777 unsupported_ext.append(QStringLiteral("ARB_buffer_storage"));
778 }
753 if (!GLAD_GL_ARB_direct_state_access) { 779 if (!GLAD_GL_ARB_direct_state_access) {
754 unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); 780 unsupported_ext.append(QStringLiteral("ARB_direct_state_access"));
755 } 781 }
@@ -934,6 +960,7 @@ void GMainWindow::BootGame(const QString& filename) {
934 // Update the GUI 960 // Update the GUI
935 if (ui.action_Single_Window_Mode->isChecked()) { 961 if (ui.action_Single_Window_Mode->isChecked()) {
936 game_list->hide(); 962 game_list->hide();
963 game_list_placeholder->hide();
937 } 964 }
938 status_bar_update_timer.start(2000); 965 status_bar_update_timer.start(2000);
939 966
@@ -963,6 +990,8 @@ void GMainWindow::BootGame(const QString& filename) {
963} 990}
964 991
965void GMainWindow::ShutdownGame() { 992void GMainWindow::ShutdownGame() {
993 AllowOSSleep();
994
966 discord_rpc->Pause(); 995 discord_rpc->Pause();
967 emu_thread->RequestStop(); 996 emu_thread->RequestStop();
968 997
@@ -989,7 +1018,10 @@ void GMainWindow::ShutdownGame() {
989 render_window->hide(); 1018 render_window->hide();
990 loading_screen->hide(); 1019 loading_screen->hide();
991 loading_screen->Clear(); 1020 loading_screen->Clear();
992 game_list->show(); 1021 if (game_list->isEmpty())
1022 game_list_placeholder->show();
1023 else
1024 game_list->show();
993 game_list->setFilterFocus(); 1025 game_list->setFilterFocus();
994 1026
995 UpdateWindowTitle(); 1027 UpdateWindowTitle();
@@ -1280,6 +1312,47 @@ void GMainWindow::OnGameListNavigateToGamedbEntry(u64 program_id,
1280 QDesktopServices::openUrl(QUrl(QStringLiteral("https://yuzu-emu.org/game/") + directory)); 1312 QDesktopServices::openUrl(QUrl(QStringLiteral("https://yuzu-emu.org/game/") + directory));
1281} 1313}
1282 1314
1315void GMainWindow::OnGameListOpenDirectory(const QString& directory) {
1316 QString path;
1317 if (directory == QStringLiteral("SDMC")) {
1318 path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir) +
1319 "Nintendo/Contents/registered");
1320 } else if (directory == QStringLiteral("UserNAND")) {
1321 path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
1322 "user/Contents/registered");
1323 } else if (directory == QStringLiteral("SysNAND")) {
1324 path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
1325 "system/Contents/registered");
1326 } else {
1327 path = directory;
1328 }
1329 if (!QFileInfo::exists(path)) {
1330 QMessageBox::critical(this, tr("Error Opening %1").arg(path), tr("Folder does not exist!"));
1331 return;
1332 }
1333 QDesktopServices::openUrl(QUrl::fromLocalFile(path));
1334}
1335
1336void GMainWindow::OnGameListAddDirectory() {
1337 const QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
1338 if (dir_path.isEmpty())
1339 return;
1340 UISettings::GameDir game_dir{dir_path, false, true};
1341 if (!UISettings::values.game_dirs.contains(game_dir)) {
1342 UISettings::values.game_dirs.append(game_dir);
1343 game_list->PopulateAsync(UISettings::values.game_dirs);
1344 } else {
1345 LOG_WARNING(Frontend, "Selected directory is already in the game list");
1346 }
1347}
1348
1349void GMainWindow::OnGameListShowList(bool show) {
1350 if (emulation_running && ui.action_Single_Window_Mode->isChecked())
1351 return;
1352 game_list->setVisible(show);
1353 game_list_placeholder->setVisible(!show);
1354};
1355
1283void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { 1356void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) {
1284 u64 title_id{}; 1357 u64 title_id{};
1285 const auto v_file = Core::GetGameFileFromPath(vfs, file); 1358 const auto v_file = Core::GetGameFileFromPath(vfs, file);
@@ -1298,8 +1371,7 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) {
1298 1371
1299 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); 1372 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
1300 if (reload) { 1373 if (reload) {
1301 game_list->PopulateAsync(UISettings::values.game_directory_path, 1374 game_list->PopulateAsync(UISettings::values.game_dirs);
1302 UISettings::values.game_directory_deepscan);
1303 } 1375 }
1304 1376
1305 config->Save(); 1377 config->Save();
@@ -1389,8 +1461,7 @@ void GMainWindow::OnMenuInstallToNAND() {
1389 const auto success = [this]() { 1461 const auto success = [this]() {
1390 QMessageBox::information(this, tr("Successfully Installed"), 1462 QMessageBox::information(this, tr("Successfully Installed"),
1391 tr("The file was successfully installed.")); 1463 tr("The file was successfully installed."));
1392 game_list->PopulateAsync(UISettings::values.game_directory_path, 1464 game_list->PopulateAsync(UISettings::values.game_dirs);
1393 UISettings::values.game_directory_deepscan);
1394 FileUtil::DeleteDirRecursively(FileUtil::GetUserPath(FileUtil::UserPath::CacheDir) + 1465 FileUtil::DeleteDirRecursively(FileUtil::GetUserPath(FileUtil::UserPath::CacheDir) +
1395 DIR_SEP + "game_list"); 1466 DIR_SEP + "game_list");
1396 }; 1467 };
@@ -1515,14 +1586,6 @@ void GMainWindow::OnMenuInstallToNAND() {
1515 } 1586 }
1516} 1587}
1517 1588
1518void GMainWindow::OnMenuSelectGameListRoot() {
1519 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
1520 if (!dir_path.isEmpty()) {
1521 UISettings::values.game_directory_path = dir_path;
1522 game_list->PopulateAsync(dir_path, UISettings::values.game_directory_deepscan);
1523 }
1524}
1525
1526void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) { 1589void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target) {
1527 const auto res = QMessageBox::information( 1590 const auto res = QMessageBox::information(
1528 this, tr("Changing Emulated Directory"), 1591 this, tr("Changing Emulated Directory"),
@@ -1541,8 +1604,7 @@ void GMainWindow::OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target)
1541 : FileUtil::UserPath::NANDDir, 1604 : FileUtil::UserPath::NANDDir,
1542 dir_path.toStdString()); 1605 dir_path.toStdString());
1543 Service::FileSystem::CreateFactories(*vfs); 1606 Service::FileSystem::CreateFactories(*vfs);
1544 game_list->PopulateAsync(UISettings::values.game_directory_path, 1607 game_list->PopulateAsync(UISettings::values.game_dirs);
1545 UISettings::values.game_directory_deepscan);
1546 } 1608 }
1547} 1609}
1548 1610
@@ -1564,6 +1626,8 @@ void GMainWindow::OnMenuRecentFile() {
1564} 1626}
1565 1627
1566void GMainWindow::OnStartGame() { 1628void GMainWindow::OnStartGame() {
1629 PreventOSSleep();
1630
1567 emu_thread->SetRunning(true); 1631 emu_thread->SetRunning(true);
1568 1632
1569 qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( 1633 qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>(
@@ -1595,6 +1659,8 @@ void GMainWindow::OnPauseGame() {
1595 ui.action_Pause->setEnabled(false); 1659 ui.action_Pause->setEnabled(false);
1596 ui.action_Stop->setEnabled(true); 1660 ui.action_Stop->setEnabled(true);
1597 ui.action_Capture_Screenshot->setEnabled(false); 1661 ui.action_Capture_Screenshot->setEnabled(false);
1662
1663 AllowOSSleep();
1598} 1664}
1599 1665
1600void GMainWindow::OnStopGame() { 1666void GMainWindow::OnStopGame() {
@@ -1702,11 +1768,11 @@ void GMainWindow::OnConfigure() {
1702 if (UISettings::values.enable_discord_presence != old_discord_presence) { 1768 if (UISettings::values.enable_discord_presence != old_discord_presence) {
1703 SetDiscordEnabled(UISettings::values.enable_discord_presence); 1769 SetDiscordEnabled(UISettings::values.enable_discord_presence);
1704 } 1770 }
1771 emit UpdateThemedIcons();
1705 1772
1706 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); 1773 const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false);
1707 if (reload) { 1774 if (reload) {
1708 game_list->PopulateAsync(UISettings::values.game_directory_path, 1775 game_list->PopulateAsync(UISettings::values.game_dirs);
1709 UISettings::values.game_directory_deepscan);
1710 } 1776 }
1711 1777
1712 config->Save(); 1778 config->Save();
@@ -1840,13 +1906,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1840 "data, or other bugs."); 1906 "data, or other bugs.");
1841 switch (result) { 1907 switch (result) {
1842 case Core::System::ResultStatus::ErrorSystemFiles: { 1908 case Core::System::ResultStatus::ErrorSystemFiles: {
1843 QString message = tr("yuzu was unable to locate a Switch system archive"); 1909 QString message;
1844 if (!details.empty()) { 1910 if (details.empty()) {
1845 message.append(tr(": %1. ").arg(QString::fromStdString(details))); 1911 message =
1912 tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message);
1846 } else { 1913 } else {
1847 message.append(tr(". ")); 1914 message = tr("yuzu was unable to locate a Switch system archive: %1. %2")
1915 .arg(QString::fromStdString(details), common_message);
1848 } 1916 }
1849 message.append(common_message);
1850 1917
1851 answer = QMessageBox::question(this, tr("System Archive Not Found"), message, 1918 answer = QMessageBox::question(this, tr("System Archive Not Found"), message,
1852 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1919 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
@@ -1855,8 +1922,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1855 } 1922 }
1856 1923
1857 case Core::System::ResultStatus::ErrorSharedFont: { 1924 case Core::System::ResultStatus::ErrorSharedFont: {
1858 QString message = tr("yuzu was unable to locate the Switch shared fonts. "); 1925 const QString message =
1859 message.append(common_message); 1926 tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message);
1860 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, 1927 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message,
1861 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1928 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
1862 status_message = tr("Shared Font Missing"); 1929 status_message = tr("Shared Font Missing");
@@ -1969,8 +2036,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
1969 Service::FileSystem::CreateFactories(*vfs); 2036 Service::FileSystem::CreateFactories(*vfs);
1970 2037
1971 if (behavior == ReinitializeKeyBehavior::Warning) { 2038 if (behavior == ReinitializeKeyBehavior::Warning) {
1972 game_list->PopulateAsync(UISettings::values.game_directory_path, 2039 game_list->PopulateAsync(UISettings::values.game_dirs);
1973 UISettings::values.game_directory_deepscan);
1974 } 2040 }
1975} 2041}
1976 2042
@@ -2135,7 +2201,6 @@ void GMainWindow::UpdateUITheme() {
2135 } 2201 }
2136 2202
2137 QIcon::setThemeSearchPaths(theme_paths); 2203 QIcon::setThemeSearchPaths(theme_paths);
2138 emit UpdateThemedIcons();
2139} 2204}
2140 2205
2141void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { 2206void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) {
@@ -2164,6 +2229,14 @@ int main(int argc, char* argv[]) {
2164 QCoreApplication::setOrganizationName(QStringLiteral("yuzu team")); 2229 QCoreApplication::setOrganizationName(QStringLiteral("yuzu team"));
2165 QCoreApplication::setApplicationName(QStringLiteral("yuzu")); 2230 QCoreApplication::setApplicationName(QStringLiteral("yuzu"));
2166 2231
2232#ifdef __APPLE__
2233 // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/".
2234 // But since we require the working directory to be the executable path for the location of the
2235 // user folder in the Qt Frontend, we need to cd into that working directory
2236 const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + "..";
2237 chdir(bin_path.c_str());
2238#endif
2239
2167 // Enables the core to make the qt created contexts current on std::threads 2240 // Enables the core to make the qt created contexts current on std::threads
2168 QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); 2241 QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
2169 QApplication app(argc, argv); 2242 QApplication app(argc, argv);
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 1137bbc7a..7d16188cb 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -30,6 +30,7 @@ class ProfilerWidget;
30class QLabel; 30class QLabel;
31class WaitTreeWidget; 31class WaitTreeWidget;
32enum class GameListOpenTarget; 32enum class GameListOpenTarget;
33class GameListPlaceholder;
33 34
34namespace Core::Frontend { 35namespace Core::Frontend {
35struct SoftwareKeyboardParameters; 36struct SoftwareKeyboardParameters;
@@ -130,6 +131,9 @@ private:
130 void ConnectWidgetEvents(); 131 void ConnectWidgetEvents();
131 void ConnectMenuEvents(); 132 void ConnectMenuEvents();
132 133
134 void PreventOSSleep();
135 void AllowOSSleep();
136
133 QStringList GetUnsupportedGLExtensions(); 137 QStringList GetUnsupportedGLExtensions();
134 bool LoadROM(const QString& filename); 138 bool LoadROM(const QString& filename);
135 void BootGame(const QString& filename); 139 void BootGame(const QString& filename);
@@ -183,12 +187,13 @@ private slots:
183 void OnGameListCopyTID(u64 program_id); 187 void OnGameListCopyTID(u64 program_id);
184 void OnGameListNavigateToGamedbEntry(u64 program_id, 188 void OnGameListNavigateToGamedbEntry(u64 program_id,
185 const CompatibilityList& compatibility_list); 189 const CompatibilityList& compatibility_list);
190 void OnGameListOpenDirectory(const QString& directory);
191 void OnGameListAddDirectory();
192 void OnGameListShowList(bool show);
186 void OnGameListOpenPerGameProperties(const std::string& file); 193 void OnGameListOpenPerGameProperties(const std::string& file);
187 void OnMenuLoadFile(); 194 void OnMenuLoadFile();
188 void OnMenuLoadFolder(); 195 void OnMenuLoadFolder();
189 void OnMenuInstallToNAND(); 196 void OnMenuInstallToNAND();
190 /// Called whenever a user selects the "File->Select Game List Root" menu item
191 void OnMenuSelectGameListRoot();
192 /// Called whenever a user select the "File->Select -- Directory" where -- is NAND or SD Card 197 /// Called whenever a user select the "File->Select -- Directory" where -- is NAND or SD Card
193 void OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target); 198 void OnMenuSelectEmulatedDirectory(EmulatedDirectoryTarget target);
194 void OnMenuRecentFile(); 199 void OnMenuRecentFile();
@@ -220,6 +225,8 @@ private:
220 GameList* game_list; 225 GameList* game_list;
221 LoadingScreen* loading_screen; 226 LoadingScreen* loading_screen;
222 227
228 GameListPlaceholder* game_list_placeholder;
229
223 // Status bar elements 230 // Status bar elements
224 QLabel* message_label = nullptr; 231 QLabel* message_label = nullptr;
225 QLabel* emu_speed_label = nullptr; 232 QLabel* emu_speed_label = nullptr;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index ffcabb495..a1ce3c0c3 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -62,7 +62,6 @@
62 <addaction name="action_Load_File"/> 62 <addaction name="action_Load_File"/>
63 <addaction name="action_Load_Folder"/> 63 <addaction name="action_Load_Folder"/>
64 <addaction name="separator"/> 64 <addaction name="separator"/>
65 <addaction name="action_Select_Game_List_Root"/>
66 <addaction name="menu_recent_files"/> 65 <addaction name="menu_recent_files"/>
67 <addaction name="separator"/> 66 <addaction name="separator"/>
68 <addaction name="action_Select_NAND_Directory"/> 67 <addaction name="action_Select_NAND_Directory"/>
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/uisettings.cpp
index 4bdc302e0..7f7d247a3 100644
--- a/src/yuzu/ui_settings.cpp
+++ b/src/yuzu/uisettings.cpp
@@ -2,7 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "ui_settings.h" 5#include "yuzu/uisettings.h"
6 6
7namespace UISettings { 7namespace UISettings {
8 8
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/uisettings.h
index a62cd6911..c57290006 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/uisettings.h
@@ -8,8 +8,10 @@
8#include <atomic> 8#include <atomic>
9#include <vector> 9#include <vector>
10#include <QByteArray> 10#include <QByteArray>
11#include <QMetaType>
11#include <QString> 12#include <QString>
12#include <QStringList> 13#include <QStringList>
14#include <QVector>
13#include "common/common_types.h" 15#include "common/common_types.h"
14 16
15namespace UISettings { 17namespace UISettings {
@@ -25,6 +27,18 @@ struct Shortcut {
25using Themes = std::array<std::pair<const char*, const char*>, 2>; 27using Themes = std::array<std::pair<const char*, const char*>, 2>;
26extern const Themes themes; 28extern const Themes themes;
27 29
30struct GameDir {
31 QString path;
32 bool deep_scan;
33 bool expanded;
34 bool operator==(const GameDir& rhs) const {
35 return path == rhs.path;
36 };
37 bool operator!=(const GameDir& rhs) const {
38 return !operator==(rhs);
39 };
40};
41
28struct Values { 42struct Values {
29 QByteArray geometry; 43 QByteArray geometry;
30 QByteArray state; 44 QByteArray state;
@@ -55,8 +69,9 @@ struct Values {
55 QString roms_path; 69 QString roms_path;
56 QString symbols_path; 70 QString symbols_path;
57 QString screenshot_path; 71 QString screenshot_path;
58 QString game_directory_path; 72 QString game_dir_deprecated;
59 bool game_directory_deepscan; 73 bool game_dir_deprecated_deepscan;
74 QVector<UISettings::GameDir> game_dirs;
60 QStringList recent_files; 75 QStringList recent_files;
61 76
62 QString theme; 77 QString theme;
@@ -84,3 +99,5 @@ struct Values {
84 99
85extern Values values; 100extern Values values;
86} // namespace UISettings 101} // namespace UISettings
102
103Q_DECLARE_METATYPE(UISettings::GameDir*);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 9ac92e937..067d58d80 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -340,7 +340,6 @@ void Config::ReadValues() {
340 } 340 }
341 341
342 // Core 342 // Core
343 Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
344 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 343 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
345 344
346 // Renderer 345 // Renderer
@@ -383,6 +382,7 @@ void Config::ReadValues() {
383 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); 382 Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false);
384 Settings::values.reporting_services = 383 Settings::values.reporting_services =
385 sdl2_config->GetBoolean("Debugging", "reporting_services", false); 384 sdl2_config->GetBoolean("Debugging", "reporting_services", false);
385 Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
386 386
387 const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); 387 const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
388 std::stringstream ss(title_list); 388 std::stringstream ss(title_list);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6538af098..0cfc111a6 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -76,10 +76,6 @@ motion_device=
76touch_device= 76touch_device=
77 77
78[Core] 78[Core]
79# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
80# 0: Interpreter (slow), 1 (default): JIT (fast)
81use_cpu_jit =
82
83# Whether to use multi-core for CPU emulation 79# Whether to use multi-core for CPU emulation
84# 0 (default): Disabled, 1: Enabled 80# 0 (default): Disabled, 1: Enabled
85use_multi_core= 81use_multi_core=
@@ -224,6 +220,9 @@ gdbstub_port=24689
224dump_exefs=false 220dump_exefs=false
225# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them 221# Determines whether or not yuzu will dump all NSOs it attempts to load while loading them
226dump_nso=false 222dump_nso=false
223# Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode
224# false: Retail/Normal Mode (default), true: Kiosk Mode
225quest_flag =
227 226
228[WebService] 227[WebService]
229# Whether or not to enable telemetry 228# Whether or not to enable telemetry
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index e2d3df180..f91b071bf 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -52,6 +52,10 @@ private:
52bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { 52bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() {
53 std::vector<std::string> unsupported_ext; 53 std::vector<std::string> unsupported_ext;
54 54
55 if (!GLAD_GL_ARB_buffer_storage)
56 unsupported_ext.push_back("ARB_buffer_storage");
57 if (!GLAD_GL_ARB_direct_state_access)
58 unsupported_ext.push_back("ARB_direct_state_access");
55 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) 59 if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)
56 unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); 60 unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev");
57 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) 61 if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge)
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index d7e0d408d..9a11dc6c3 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -114,7 +114,6 @@ void Config::ReadValues() {
114 } 114 }
115 115
116 // Core 116 // Core
117 Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
118 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 117 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
119 118
120 // Renderer 119 // Renderer
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 46a9960cd..9a3e86d68 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -8,10 +8,6 @@ namespace DefaultINI {
8 8
9const char* sdl2_config_file = R"( 9const char* sdl2_config_file = R"(
10[Core] 10[Core]
11# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
12# 0: Interpreter (slow), 1 (default): JIT (fast)
13use_cpu_jit =
14
15# Whether to use multi-core for CPU emulation 11# Whether to use multi-core for CPU emulation
16# 0 (default): Disabled, 1: Enabled 12# 0 (default): Disabled, 1: Enabled
17use_multi_core= 13use_multi_core=
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index b589c3de3..0ee97aa54 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -92,7 +92,6 @@ int main(int argc, char** argv) {
92 92
93 int option_index = 0; 93 int option_index = 0;
94 94
95 char* endarg;
96#ifdef _WIN32 95#ifdef _WIN32
97 int argc_w; 96 int argc_w;
98 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); 97 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
@@ -226,7 +225,7 @@ int main(int argc, char** argv) {
226 225
227 switch (load_result) { 226 switch (load_result) {
228 case Core::System::ResultStatus::ErrorGetLoader: 227 case Core::System::ResultStatus::ErrorGetLoader:
229 LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); 228 LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath);
230 return -1; 229 return -1;
231 case Core::System::ResultStatus::ErrorLoader: 230 case Core::System::ResultStatus::ErrorLoader:
232 LOG_CRITICAL(Frontend, "Failed to load ROM!"); 231 LOG_CRITICAL(Frontend, "Failed to load ROM!");