Quellcode durchsuchen

EXPERIMENTAL: Metal backend (#441)

This is not a continuation of the Metal backend; this is simply bringing
the branch up to date and merging it as-is behind an experiment.

---------

Co-authored-by: Isaac Marovitz <isaacryu@icloud.com>
Co-authored-by: Samuliak <samuliak77@gmail.com>
Co-authored-by: SamoZ256 <96914946+SamoZ256@users.noreply.github.com>
Co-authored-by: Isaac Marovitz <42140194+IsaacMarovitz@users.noreply.github.com>
Co-authored-by: riperiperi <rhy3756547@hotmail.com>
Co-authored-by: Gabriel A <gab.dark.100@gmail.com>
Evan Husted vor 1 Jahr
Ursprung
Commit
852823104f
100 geänderte Dateien mit 14153 neuen und 79 gelöschten Zeilen
  1. 1 0
      Directory.Packages.props
  2. 8 0
      Ryujinx.sln
  3. 2 0
      src/Ryujinx.Common/Configuration/GraphicsBackend.cs
  4. 18 0
      src/Ryujinx.Graphics.GAL/ComputeSize.cs
  5. 78 0
      src/Ryujinx.Graphics.GAL/Format.cs
  6. 8 9
      src/Ryujinx.Graphics.GAL/ShaderInfo.cs
  7. 1 18
      src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs
  8. 0 18
      src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs
  9. 11 2
      src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
  10. 6 1
      src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs
  11. 4 4
      src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
  12. 4 4
      src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
  13. 11 0
      src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs
  14. 14 7
      src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
  15. 35 16
      src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs
  16. 146 0
      src/Ryujinx.Graphics.Metal/Auto.cs
  17. 107 0
      src/Ryujinx.Graphics.Metal/BackgroundResources.cs
  18. 157 0
      src/Ryujinx.Graphics.Metal/BitMap.cs
  19. 385 0
      src/Ryujinx.Graphics.Metal/BufferHolder.cs
  20. 237 0
      src/Ryujinx.Graphics.Metal/BufferManager.cs
  21. 85 0
      src/Ryujinx.Graphics.Metal/BufferUsageBitmap.cs
  22. 294 0
      src/Ryujinx.Graphics.Metal/CacheByRange.cs
  23. 170 0
      src/Ryujinx.Graphics.Metal/CommandBufferEncoder.cs
  24. 289 0
      src/Ryujinx.Graphics.Metal/CommandBufferPool.cs
  25. 43 0
      src/Ryujinx.Graphics.Metal/CommandBufferScoped.cs
  26. 41 0
      src/Ryujinx.Graphics.Metal/Constants.cs
  27. 22 0
      src/Ryujinx.Graphics.Metal/CounterEvent.cs
  28. 68 0
      src/Ryujinx.Graphics.Metal/DepthStencilCache.cs
  29. 26 0
      src/Ryujinx.Graphics.Metal/DisposableBuffer.cs
  30. 22 0
      src/Ryujinx.Graphics.Metal/DisposableSampler.cs
  31. 10 0
      src/Ryujinx.Graphics.Metal/Effects/IPostProcessingEffect.cs
  32. 18 0
      src/Ryujinx.Graphics.Metal/Effects/IScalingFilter.cs
  33. 63 0
      src/Ryujinx.Graphics.Metal/EncoderResources.cs
  34. 206 0
      src/Ryujinx.Graphics.Metal/EncoderState.cs
  35. 1788 0
      src/Ryujinx.Graphics.Metal/EncoderStateManager.cs
  36. 293 0
      src/Ryujinx.Graphics.Metal/EnumConversion.cs
  37. 77 0
      src/Ryujinx.Graphics.Metal/FenceHolder.cs
  38. 49 0
      src/Ryujinx.Graphics.Metal/FormatConverter.cs
  39. 196 0
      src/Ryujinx.Graphics.Metal/FormatTable.cs
  40. 82 0
      src/Ryujinx.Graphics.Metal/HardwareInfo.cs
  41. 143 0
      src/Ryujinx.Graphics.Metal/HashTableSlim.cs
  42. 868 0
      src/Ryujinx.Graphics.Metal/HelperShader.cs
  43. 121 0
      src/Ryujinx.Graphics.Metal/IdList.cs
  44. 74 0
      src/Ryujinx.Graphics.Metal/ImageArray.cs
  45. 118 0
      src/Ryujinx.Graphics.Metal/IndexBufferPattern.cs
  46. 103 0
      src/Ryujinx.Graphics.Metal/IndexBufferState.cs
  47. 309 0
      src/Ryujinx.Graphics.Metal/MetalRenderer.cs
  48. 262 0
      src/Ryujinx.Graphics.Metal/MultiFenceHolder.cs
  49. 99 0
      src/Ryujinx.Graphics.Metal/PersistentFlushBuffer.cs
  50. 877 0
      src/Ryujinx.Graphics.Metal/Pipeline.cs
  51. 286 0
      src/Ryujinx.Graphics.Metal/Program.cs
  52. 22 0
      src/Ryujinx.Graphics.Metal/ResourceBindingSegment.cs
  53. 59 0
      src/Ryujinx.Graphics.Metal/ResourceLayoutBuilder.cs
  54. 30 0
      src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj
  55. 90 0
      src/Ryujinx.Graphics.Metal/SamplerHolder.cs
  56. 43 0
      src/Ryujinx.Graphics.Metal/Shaders/Blit.metal
  57. 45 0
      src/Ryujinx.Graphics.Metal/Shaders/BlitMs.metal
  58. 72 0
      src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
  59. 38 0
      src/Ryujinx.Graphics.Metal/Shaders/ColorClear.metal
  60. 66 0
      src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal
  61. 59 0
      src/Ryujinx.Graphics.Metal/Shaders/ConvertIndexBuffer.metal
  62. 27 0
      src/Ryujinx.Graphics.Metal/Shaders/DepthBlit.metal
  63. 29 0
      src/Ryujinx.Graphics.Metal/Shaders/DepthBlitMs.metal
  64. 42 0
      src/Ryujinx.Graphics.Metal/Shaders/DepthStencilClear.metal
  65. 27 0
      src/Ryujinx.Graphics.Metal/Shaders/StencilBlit.metal
  66. 29 0
      src/Ryujinx.Graphics.Metal/Shaders/StencilBlitMs.metal
  67. 288 0
      src/Ryujinx.Graphics.Metal/StagingBuffer.cs
  68. 110 0
      src/Ryujinx.Graphics.Metal/State/DepthStencilUid.cs
  69. 341 0
      src/Ryujinx.Graphics.Metal/State/PipelineState.cs
  70. 208 0
      src/Ryujinx.Graphics.Metal/State/PipelineUid.cs
  71. 42 0
      src/Ryujinx.Graphics.Metal/StateCache.cs
  72. 30 0
      src/Ryujinx.Graphics.Metal/StringHelper.cs
  73. 214 0
      src/Ryujinx.Graphics.Metal/SyncManager.cs
  74. 654 0
      src/Ryujinx.Graphics.Metal/Texture.cs
  75. 93 0
      src/Ryujinx.Graphics.Metal/TextureArray.cs
  76. 67 0
      src/Ryujinx.Graphics.Metal/TextureBase.cs
  77. 132 0
      src/Ryujinx.Graphics.Metal/TextureBuffer.cs
  78. 265 0
      src/Ryujinx.Graphics.Metal/TextureCopy.cs
  79. 60 0
      src/Ryujinx.Graphics.Metal/VertexBufferState.cs
  80. 231 0
      src/Ryujinx.Graphics.Metal/Window.cs
  81. 108 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/CodeGenContext.cs
  82. 578 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Declarations.cs
  83. 34 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Defaults.cs
  84. 5 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal
  85. 5 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal
  86. 6 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal
  87. 10 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/HelperFunctionNames.cs
  88. 14 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal
  89. 7 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal
  90. 185 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGen.cs
  91. 30 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBallot.cs
  92. 15 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBarrier.cs
  93. 60 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenCall.cs
  94. 222 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenHelper.cs
  95. 672 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenMemory.cs
  96. 32 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenVector.cs
  97. 18 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstInfo.cs
  98. 35 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstType.cs
  99. 83 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/IoMap.cs
  100. 286 0
      src/Ryujinx.Graphics.Shader/CodeGen/Msl/MslGenerator.cs

+ 1 - 0
Directory.Packages.props

@@ -44,6 +44,7 @@
     <PackageVersion Include="Gommon" Version="2.6.8" />
     <PackageVersion Include="securifybv.ShellLink" Version="0.1.0" />
     <PackageVersion Include="shaderc.net" Version="0.1.0" />
+    <PackageVersion Include="SharpMetal" Version="1.0.0-preview20" />
     <PackageVersion Include="SharpZipLib" Version="1.4.2" />
     <PackageVersion Include="Silk.NET.Vulkan" Version="2.21.0" />
     <PackageVersion Include="Silk.NET.Vulkan.Extensions.EXT" Version="2.21.0" />

+ 8 - 0
Ryujinx.sln

@@ -80,6 +80,10 @@ EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Horizon.Kernel.Generators", "src\Ryujinx.Horizon.Kernel.Generators\Ryujinx.Horizon.Kernel.Generators.csproj", "{7F55A45D-4E1D-4A36-ADD3-87F29A285AA2}"
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.HLE.Generators", "src\Ryujinx.HLE.Generators\Ryujinx.HLE.Generators.csproj", "{B575BCDE-2FD8-4A5D-8756-31CDD7FE81F0}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Metal", "src\Ryujinx.Graphics.Metal\Ryujinx.Graphics.Metal.csproj", "{C08931FA-1191-417A-864F-3882D93E683B}"
+	ProjectSection(ProjectDependencies) = postProject
+		{A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E} = {A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}
+	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{36F870C1-3E5F-485F-B426-F0645AF78751}"
 	ProjectSection(SolutionItems) = preProject
@@ -257,6 +261,10 @@ Global
 		{4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{4A89A234-4F19-497D-A576-DDE8CDFC5B22}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C08931FA-1191-417A-864F-3882D93E683B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C08931FA-1191-417A-864F-3882D93E683B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C08931FA-1191-417A-864F-3882D93E683B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C08931FA-1191-417A-864F-3882D93E683B}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE

+ 2 - 0
src/Ryujinx.Common/Configuration/GraphicsBackend.cs

@@ -6,7 +6,9 @@ namespace Ryujinx.Common.Configuration
     [JsonConverter(typeof(TypedStringEnumConverter<GraphicsBackend>))]
     public enum GraphicsBackend
     {
+        Auto,
         Vulkan,
         OpenGl,
+        Metal
     }
 }

+ 18 - 0
src/Ryujinx.Graphics.GAL/ComputeSize.cs

@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.GAL
+{
+    public readonly struct ComputeSize
+    {
+        public readonly static ComputeSize VtgAsCompute = new ComputeSize(32, 32, 1);
+
+        public readonly int X;
+        public readonly int Y;
+        public readonly int Z;
+
+        public ComputeSize(int x, int y, int z)
+        {
+            X = x;
+            Y = y;
+            Z = z;
+        }
+    }
+}

+ 78 - 0
src/Ryujinx.Graphics.GAL/Format.cs

@@ -339,6 +339,84 @@ namespace Ryujinx.Graphics.GAL
             return 1;
         }
 
+        /// <summary>
+        /// Get bytes per element for this format.
+        /// </summary>
+        /// <param name="format">Texture format</param>
+        /// <returns>Byte size for an element of this format (pixel, vertex attribute, etc)</returns>
+        public static int GetBytesPerElement(this Format format)
+        {
+            int scalarSize = format.GetScalarSize();
+
+            switch (format)
+            {
+                case Format.R8G8Unorm:
+                case Format.R8G8Snorm:
+                case Format.R8G8Uint:
+                case Format.R8G8Sint:
+                case Format.R8G8Uscaled:
+                case Format.R8G8Sscaled:
+                case Format.R16G16Float:
+                case Format.R16G16Unorm:
+                case Format.R16G16Snorm:
+                case Format.R16G16Uint:
+                case Format.R16G16Sint:
+                case Format.R16G16Uscaled:
+                case Format.R16G16Sscaled:
+                case Format.R32G32Float:
+                case Format.R32G32Uint:
+                case Format.R32G32Sint:
+                case Format.R32G32Uscaled:
+                case Format.R32G32Sscaled:
+                    return 2 * scalarSize;
+
+                case Format.R8G8B8Unorm:
+                case Format.R8G8B8Snorm:
+                case Format.R8G8B8Uint:
+                case Format.R8G8B8Sint:
+                case Format.R8G8B8Uscaled:
+                case Format.R8G8B8Sscaled:
+                case Format.R16G16B16Float:
+                case Format.R16G16B16Unorm:
+                case Format.R16G16B16Snorm:
+                case Format.R16G16B16Uint:
+                case Format.R16G16B16Sint:
+                case Format.R16G16B16Uscaled:
+                case Format.R16G16B16Sscaled:
+                case Format.R32G32B32Float:
+                case Format.R32G32B32Uint:
+                case Format.R32G32B32Sint:
+                case Format.R32G32B32Uscaled:
+                case Format.R32G32B32Sscaled:
+                    return 3 * scalarSize;
+
+                case Format.R8G8B8A8Unorm:
+                case Format.R8G8B8A8Snorm:
+                case Format.R8G8B8A8Uint:
+                case Format.R8G8B8A8Sint:
+                case Format.R8G8B8A8Srgb:
+                case Format.R8G8B8A8Uscaled:
+                case Format.R8G8B8A8Sscaled:
+                case Format.B8G8R8A8Unorm:
+                case Format.B8G8R8A8Srgb:
+                case Format.R16G16B16A16Float:
+                case Format.R16G16B16A16Unorm:
+                case Format.R16G16B16A16Snorm:
+                case Format.R16G16B16A16Uint:
+                case Format.R16G16B16A16Sint:
+                case Format.R16G16B16A16Uscaled:
+                case Format.R16G16B16A16Sscaled:
+                case Format.R32G32B32A32Float:
+                case Format.R32G32B32A32Uint:
+                case Format.R32G32B32A32Sint:
+                case Format.R32G32B32A32Uscaled:
+                case Format.R32G32B32A32Sscaled:
+                    return 4 * scalarSize;
+            }
+
+            return scalarSize;
+        }
+
         /// <summary>
         /// Checks if the texture format is a depth or depth-stencil format.
         /// </summary>

+ 8 - 9
src/Ryujinx.Graphics.GAL/ShaderInfo.cs

@@ -4,23 +4,22 @@ namespace Ryujinx.Graphics.GAL
     {
         public int FragmentOutputMap { get; }
         public ResourceLayout ResourceLayout { get; }
+        public ComputeSize ComputeLocalSize { get; }
         public ProgramPipelineState? State { get; }
         public bool FromCache { get; set; }
 
-        public ShaderInfo(int fragmentOutputMap, ResourceLayout resourceLayout, ProgramPipelineState state, bool fromCache = false)
+        public ShaderInfo(
+            int fragmentOutputMap,
+            ResourceLayout resourceLayout,
+            ComputeSize computeLocalSize,
+            ProgramPipelineState? state,
+            bool fromCache = false)
         {
             FragmentOutputMap = fragmentOutputMap;
             ResourceLayout = resourceLayout;
+            ComputeLocalSize = computeLocalSize;
             State = state;
             FromCache = fromCache;
         }
-
-        public ShaderInfo(int fragmentOutputMap, ResourceLayout resourceLayout, bool fromCache = false)
-        {
-            FragmentOutputMap = fragmentOutputMap;
-            ResourceLayout = resourceLayout;
-            State = null;
-            FromCache = fromCache;
-        }
     }
 }

+ 1 - 18
src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs

@@ -11,8 +11,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
     /// </summary>
     class VtgAsComputeContext : IDisposable
     {
-        private const int DummyBufferSize = 16;
-
         private readonly GpuContext _context;
 
         /// <summary>
@@ -48,7 +46,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
                         1,
                         1,
                         1,
-                        1,
+                        format.GetBytesPerElement(),
                         format,
                         DepthStencilMode.Depth,
                         Target.TextureBuffer,
@@ -521,21 +519,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
             return new BufferRange(_geometryIndexDataBuffer.Handle, offset, size, write);
         }
 
-        /// <summary>
-        /// Gets the range for a dummy 16 bytes buffer, filled with zeros.
-        /// </summary>
-        /// <returns>Dummy buffer range</returns>
-        public BufferRange GetDummyBufferRange()
-        {
-            if (_dummyBuffer == BufferHandle.Null)
-            {
-                _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize, BufferAccess.DeviceMemory);
-                _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0);
-            }
-
-            return new BufferRange(_dummyBuffer, 0, DummyBufferSize);
-        }
-
         /// <summary>
         /// Gets the range for a sequential index buffer, with ever incrementing index values.
         /// </summary>

+ 0 - 18
src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs

@@ -147,7 +147,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
                 {
                     _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount);
                     _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0);
-                    SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format);
                     continue;
                 }
 
@@ -163,15 +162,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
                 {
                     _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount);
                     _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0);
-                    SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format);
                     continue;
                 }
 
                 int vbStride = vertexBuffer.UnpackStride();
                 ulong vbSize = GetVertexBufferSize(address, endAddress.Pack(), vbStride, _indexed, instanced, _firstVertex, _count);
 
-                ulong oldVbSize = vbSize;
-
                 ulong attributeOffset = (ulong)vertexAttrib.UnpackOffset();
                 int componentSize = format.GetScalarSize();
 
@@ -345,20 +341,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw
             return maxOutputVertices / verticesPerPrimitive;
         }
 
-        /// <summary>
-        /// Binds a dummy buffer as vertex buffer into a buffer texture.
-        /// </summary>
-        /// <param name="reservations">Shader resource binding reservations</param>
-        /// <param name="index">Buffer texture index</param>
-        /// <param name="format">Buffer texture format</param>
-        private readonly void SetDummyBufferTexture(ResourceReservations reservations, int index, Format format)
-        {
-            ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format);
-            bufferTexture.SetStorage(_vacContext.GetDummyBufferRange());
-
-            _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null);
-        }
-
         /// <summary>
         /// Binds a vertex buffer into a buffer texture.
         /// </summary>

+ 11 - 2
src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs

@@ -324,6 +324,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                 bool loadHostCache = header.CodeGenVersion == CodeGenVersion;
 
+                if (context.Capabilities.Api == TargetApi.Metal)
+                {
+                    loadHostCache = false;
+                }
+
                 int programIndex = 0;
 
                 DataEntry entry = new();
@@ -392,7 +397,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                                 context,
                                 shaders,
                                 specState.PipelineState,
-                                specState.TransformFeedbackDescriptors != null);
+                                specState.TransformFeedbackDescriptors != null,
+                                specState.ComputeState.GetLocalSize());
 
                             IProgram hostProgram;
 
@@ -629,7 +635,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
                 return;
             }
 
-            WriteHostCode(context, hostCode, program.Shaders, streams, timestamp);
+            if (context.Capabilities.Api != TargetApi.Metal)
+            {
+                WriteHostCode(context, hostCode, program.Shaders, streams, timestamp);
+            }
         }
 
         /// <summary>

+ 6 - 1
src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs

@@ -490,7 +490,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             {
                 ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length];
 
-                ShaderInfoBuilder shaderInfoBuilder = new(_context, compilation.SpecializationState.TransformFeedbackDescriptors != null);
+                ref GpuChannelComputeState computeState = ref compilation.SpecializationState.ComputeState;
+
+                ShaderInfoBuilder shaderInfoBuilder = new(
+                    _context,
+                    compilation.SpecializationState.TransformFeedbackDescriptors != null,
+                    computeLocalSize: computeState.GetLocalSize());
 
                 for (int index = 0; index < compilation.TranslatedStages.Length; index++)
                 {

+ 4 - 4
src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs

@@ -16,7 +16,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private readonly GpuAccessorState _state;
         private readonly int _stageIndex;
         private readonly bool _compute;
-        private readonly bool _isVulkan;
+        private readonly bool _isOpenGL;
         private readonly bool _hasGeometryShader;
         private readonly bool _supportsQuads;
 
@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             _channel = channel;
             _state = state;
             _stageIndex = stageIndex;
-            _isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
+            _isOpenGL = context.Capabilities.Api == TargetApi.OpenGL;
             _hasGeometryShader = hasGeometryShader;
             _supportsQuads = context.Capabilities.SupportsQuads;
 
@@ -116,10 +116,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
         public GpuGraphicsState QueryGraphicsState()
         {
             return _state.GraphicsState.CreateShaderGraphicsState(
-                !_isVulkan,
+                _isOpenGL,
                 _supportsQuads,
                 _hasGeometryShader,
-                _isVulkan || _state.GraphicsState.YNegateEnabled);
+                !_isOpenGL || _state.GraphicsState.YNegateEnabled);
         }
 
         /// <inheritdoc/>

+ 4 - 4
src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs

@@ -55,7 +55,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             int binding;
 
-            if (_context.Capabilities.Api == TargetApi.Vulkan)
+            if (_context.Capabilities.Api != TargetApi.OpenGL)
             {
                 binding = GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer");
             }
@@ -71,7 +71,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             int binding;
 
-            if (_context.Capabilities.Api == TargetApi.Vulkan)
+            if (_context.Capabilities.Api != TargetApi.OpenGL)
             {
                 if (count == 1)
                 {
@@ -103,7 +103,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             int binding;
 
-            if (_context.Capabilities.Api == TargetApi.Vulkan)
+            if (_context.Capabilities.Api != TargetApi.OpenGL)
             {
                 binding = GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer");
             }
@@ -119,7 +119,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         {
             int binding;
 
-            if (_context.Capabilities.Api == TargetApi.Vulkan)
+            if (_context.Capabilities.Api != TargetApi.OpenGL)
             {
                 if (count == 1)
                 {

+ 11 - 0
src/Ryujinx.Graphics.Gpu/Shader/GpuChannelComputeState.cs

@@ -1,3 +1,5 @@
+using Ryujinx.Graphics.GAL;
+
 namespace Ryujinx.Graphics.Gpu.Shader
 {
     /// <summary>
@@ -61,5 +63,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
             SharedMemorySize = sharedMemorySize;
             HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
         }
+
+        /// <summary>
+        /// Gets the local group size of the shader in a GAL compatible struct.
+        /// </summary>
+        /// <returns>Local group size</returns>
+        public ComputeSize GetLocalSize()
+        {
+            return new ComputeSize(LocalSizeX, LocalSizeY, LocalSizeZ);
+        }
     }
 }

+ 14 - 7
src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs

@@ -224,7 +224,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
             TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode, asCompute: false);
 
             ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
-            ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info);
+            ShaderInfo info = ShaderInfoBuilder.BuildForCompute(
+                _context,
+                translatedShader.Program.Info,
+                computeState.GetLocalSize());
             IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info);
 
             cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
@@ -425,7 +428,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
                             TranslatorContext lastInVertexPipeline = geometryToCompute ? translatorContexts[4] ?? currentStage : currentStage;
 
-                            program = lastInVertexPipeline.GenerateVertexPassthroughForCompute();
+                            (program, ShaderProgramInfo vacInfo) = lastInVertexPipeline.GenerateVertexPassthroughForCompute();
+                            infoBuilder.AddStageInfoVac(vacInfo);
                         }
                         else
                         {
@@ -530,7 +534,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private ShaderAsCompute CreateHostVertexAsComputeProgram(ShaderProgram program, TranslatorContext context, bool tfEnabled)
         {
             ShaderSource source = new(program.Code, program.BinaryCode, ShaderStage.Compute, program.Language);
-            ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, tfEnabled);
+            ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, context.GetVertexAsComputeInfo(), tfEnabled);
 
             return new(_context.Renderer.CreateProgram(new[] { source }, info), program.Info, context.GetResourceReservations());
         }
@@ -822,16 +826,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
         /// <summary>
         /// Creates shader translation options with the requested graphics API and flags.
-        /// The shader language is choosen based on the current configuration and graphics API.
+        /// The shader language is chosen based on the current configuration and graphics API.
         /// </summary>
         /// <param name="api">Target graphics API</param>
         /// <param name="flags">Translation flags</param>
         /// <returns>Translation options</returns>
         private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags)
         {
-            TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan
-                ? TargetLanguage.Spirv
-                : TargetLanguage.Glsl;
+            TargetLanguage lang = api switch
+            {
+                TargetApi.OpenGL => TargetLanguage.Glsl,
+                TargetApi.Vulkan => GraphicsConfig.EnableSpirvCompilationOnVulkan ? TargetLanguage.Spirv : TargetLanguage.Glsl,
+                TargetApi.Metal => TargetLanguage.Msl,
+            };
 
             return new TranslationOptions(lang, api, flags);
         }

+ 35 - 16
src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs

@@ -22,6 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             ResourceStages.Geometry;
 
         private readonly GpuContext _context;
+        private readonly ComputeSize _computeLocalSize;
 
         private int _fragmentOutputMap;
 
@@ -39,9 +40,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="context">GPU context that owns the shaders that will be added to the builder</param>
         /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
         /// <param name="vertexAsCompute">Indicates that the vertex shader will be emulated on a compute shader</param>
-        public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false)
+        /// <param name="computeLocalSize">Indicates the local thread size for a compute shader</param>
+        public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false, ComputeSize computeLocalSize = default)
         {
             _context = context;
+            _computeLocalSize = computeLocalSize;
 
             _fragmentOutputMap = -1;
 
@@ -95,7 +98,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private void PopulateDescriptorAndUsages(ResourceStages stages, ResourceType type, int setIndex, int start, int count, bool write = false)
         {
             AddDescriptor(stages, type, setIndex, start, count);
-            AddUsage(stages, type, setIndex, start, count, write);
+            // AddUsage(stages, type, setIndex, start, count, write);
         }
 
         /// <summary>
@@ -159,6 +162,25 @@ namespace Ryujinx.Graphics.Gpu.Shader
             AddUsage(info.Images, stages, isImage: true);
         }
 
+        public void AddStageInfoVac(ShaderProgramInfo info)
+        {
+            ResourceStages stages = info.Stage switch
+            {
+                ShaderStage.Compute => ResourceStages.Compute,
+                ShaderStage.Vertex => ResourceStages.Vertex,
+                ShaderStage.TessellationControl => ResourceStages.TessellationControl,
+                ShaderStage.TessellationEvaluation => ResourceStages.TessellationEvaluation,
+                ShaderStage.Geometry => ResourceStages.Geometry,
+                ShaderStage.Fragment => ResourceStages.Fragment,
+                _ => ResourceStages.None,
+            };
+
+            AddUsage(info.CBuffers, stages, isStorage: false);
+            AddUsage(info.SBuffers, stages, isStorage: true);
+            AddUsage(info.Textures, stages, isImage: false);
+            AddUsage(info.Images, stages, isImage: true);
+        }
+
         /// <summary>
         /// Adds a resource descriptor to the list of descriptors.
         /// </summary>
@@ -361,14 +383,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             ResourceLayout resourceLayout = new(descriptors.AsReadOnly(), usages.AsReadOnly());
 
-            if (pipeline.HasValue)
-            {
-                return new ShaderInfo(_fragmentOutputMap, resourceLayout, pipeline.Value, fromCache);
-            }
-            else
-            {
-                return new ShaderInfo(_fragmentOutputMap, resourceLayout, fromCache);
-            }
+            return new ShaderInfo(_fragmentOutputMap, resourceLayout, _computeLocalSize, pipeline, fromCache);
         }
 
         /// <summary>
@@ -378,14 +393,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="programs">Shaders from the disk cache</param>
         /// <param name="pipeline">Optional pipeline for background compilation</param>
         /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
+        /// <param name="computeLocalSize">Compute local thread size</param>
         /// <returns>Shader information</returns>
         public static ShaderInfo BuildForCache(
             GpuContext context,
             IEnumerable<CachedShaderStage> programs,
             ProgramPipelineState? pipeline,
-            bool tfEnabled)
+            bool tfEnabled,
+            ComputeSize computeLocalSize)
         {
-            ShaderInfoBuilder builder = new(context, tfEnabled);
+            ShaderInfoBuilder builder = new(context, tfEnabled, computeLocalSize: computeLocalSize);
 
             foreach (CachedShaderStage program in programs)
             {
@@ -403,11 +420,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// </summary>
         /// <param name="context">GPU context that owns the shader</param>
         /// <param name="info">Compute shader information</param>
+        /// <param name="computeLocalSize">Compute local thread size</param>
         /// <param name="fromCache">True if the compute shader comes from a disk cache, false otherwise</param>
         /// <returns>Shader information</returns>
-        public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, bool fromCache = false)
+        public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, ComputeSize computeLocalSize, bool fromCache = false)
         {
-            ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false);
+            ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false, computeLocalSize: computeLocalSize);
 
             builder.AddStageInfo(info);
 
@@ -422,10 +440,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
         /// <param name="fromCache">True if the compute shader comes from a disk cache, false otherwise</param>
         /// <returns>Shader information</returns>
-        public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, bool tfEnabled, bool fromCache = false)
+        public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, ShaderProgramInfo info2, bool tfEnabled, bool fromCache = false)
         {
-            ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true);
+            ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true, computeLocalSize: ComputeSize.VtgAsCompute);
 
+            builder.AddStageInfoVac(info2);
             builder.AddStageInfo(info, vertexAsCompute: true);
 
             return builder.Build(null, fromCache);

+ 146 - 0
src/Ryujinx.Graphics.Metal/Auto.cs

@@ -0,0 +1,146 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    interface IAuto
+    {
+        bool HasCommandBufferDependency(CommandBufferScoped cbs);
+
+        void IncrementReferenceCount();
+        void DecrementReferenceCount(int cbIndex);
+        void DecrementReferenceCount();
+    }
+
+    interface IAutoPrivate : IAuto
+    {
+        void AddCommandBufferDependencies(CommandBufferScoped cbs);
+    }
+
+    [SupportedOSPlatform("macos")]
+    class Auto<T> : IAutoPrivate, IDisposable where T : IDisposable
+    {
+        private int _referenceCount;
+        private T _value;
+
+        private readonly BitMap _cbOwnership;
+        private readonly MultiFenceHolder _waitable;
+
+        private bool _disposed;
+        private bool _destroyed;
+
+        public Auto(T value)
+        {
+            _referenceCount = 1;
+            _value = value;
+            _cbOwnership = new BitMap(CommandBufferPool.MaxCommandBuffers);
+        }
+
+        public Auto(T value, MultiFenceHolder waitable) : this(value)
+        {
+            _waitable = waitable;
+        }
+
+        public T Get(CommandBufferScoped cbs, int offset, int size, bool write = false)
+        {
+            _waitable?.AddBufferUse(cbs.CommandBufferIndex, offset, size, write);
+            return Get(cbs);
+        }
+
+        public T GetUnsafe()
+        {
+            return _value;
+        }
+
+        public T Get(CommandBufferScoped cbs)
+        {
+            if (!_destroyed)
+            {
+                AddCommandBufferDependencies(cbs);
+            }
+
+            return _value;
+        }
+
+        public bool HasCommandBufferDependency(CommandBufferScoped cbs)
+        {
+            return _cbOwnership.IsSet(cbs.CommandBufferIndex);
+        }
+
+        public bool HasRentedCommandBufferDependency(CommandBufferPool cbp)
+        {
+            return _cbOwnership.AnySet();
+        }
+
+        public void AddCommandBufferDependencies(CommandBufferScoped cbs)
+        {
+            // We don't want to add a reference to this object to the command buffer
+            // more than once, so if we detect that the command buffer already has ownership
+            // of this object, then we can just return without doing anything else.
+            if (_cbOwnership.Set(cbs.CommandBufferIndex))
+            {
+                if (_waitable != null)
+                {
+                    cbs.AddWaitable(_waitable);
+                }
+
+                cbs.AddDependant(this);
+            }
+        }
+
+        public bool TryIncrementReferenceCount()
+        {
+            int lastValue;
+            do
+            {
+                lastValue = _referenceCount;
+
+                if (lastValue == 0)
+                {
+                    return false;
+                }
+            }
+            while (Interlocked.CompareExchange(ref _referenceCount, lastValue + 1, lastValue) != lastValue);
+
+            return true;
+        }
+
+        public void IncrementReferenceCount()
+        {
+            if (Interlocked.Increment(ref _referenceCount) == 1)
+            {
+                Interlocked.Decrement(ref _referenceCount);
+                throw new InvalidOperationException("Attempted to increment the reference count of an object that was already destroyed.");
+            }
+        }
+
+        public void DecrementReferenceCount(int cbIndex)
+        {
+            _cbOwnership.Clear(cbIndex);
+            DecrementReferenceCount();
+        }
+
+        public void DecrementReferenceCount()
+        {
+            if (Interlocked.Decrement(ref _referenceCount) == 0)
+            {
+                _value.Dispose();
+                _value = default;
+                _destroyed = true;
+            }
+
+            Debug.Assert(_referenceCount >= 0);
+        }
+
+        public void Dispose()
+        {
+            if (!_disposed)
+            {
+                DecrementReferenceCount();
+                _disposed = true;
+            }
+        }
+    }
+}

+ 107 - 0
src/Ryujinx.Graphics.Metal/BackgroundResources.cs

@@ -0,0 +1,107 @@
+using SharpMetal.Metal;
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class BackgroundResource : IDisposable
+    {
+        private readonly MetalRenderer _renderer;
+
+        private CommandBufferPool _pool;
+        private PersistentFlushBuffer _flushBuffer;
+
+        public BackgroundResource(MetalRenderer renderer)
+        {
+            _renderer = renderer;
+        }
+
+        public CommandBufferPool GetPool()
+        {
+            if (_pool == null)
+            {
+                MTLCommandQueue queue = _renderer.BackgroundQueue;
+                _pool = new CommandBufferPool(queue, true);
+                _pool.Initialize(null); // TODO: Proper encoder factory for background render/compute
+            }
+
+            return _pool;
+        }
+
+        public PersistentFlushBuffer GetFlushBuffer()
+        {
+            _flushBuffer ??= new PersistentFlushBuffer(_renderer);
+
+            return _flushBuffer;
+        }
+
+        public void Dispose()
+        {
+            _pool?.Dispose();
+            _flushBuffer?.Dispose();
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    class BackgroundResources : IDisposable
+    {
+        private readonly MetalRenderer _renderer;
+
+        private readonly Dictionary<Thread, BackgroundResource> _resources;
+
+        public BackgroundResources(MetalRenderer renderer)
+        {
+            _renderer = renderer;
+
+            _resources = new Dictionary<Thread, BackgroundResource>();
+        }
+
+        private void Cleanup()
+        {
+            lock (_resources)
+            {
+                foreach (KeyValuePair<Thread, BackgroundResource> tuple in _resources)
+                {
+                    if (!tuple.Key.IsAlive)
+                    {
+                        tuple.Value.Dispose();
+                        _resources.Remove(tuple.Key);
+                    }
+                }
+            }
+        }
+
+        public BackgroundResource Get()
+        {
+            Thread thread = Thread.CurrentThread;
+
+            lock (_resources)
+            {
+                if (!_resources.TryGetValue(thread, out BackgroundResource resource))
+                {
+                    Cleanup();
+
+                    resource = new BackgroundResource(_renderer);
+
+                    _resources[thread] = resource;
+                }
+
+                return resource;
+            }
+        }
+
+        public void Dispose()
+        {
+            lock (_resources)
+            {
+                foreach (var resource in _resources.Values)
+                {
+                    resource.Dispose();
+                }
+            }
+        }
+    }
+}

+ 157 - 0
src/Ryujinx.Graphics.Metal/BitMap.cs

@@ -0,0 +1,157 @@
+namespace Ryujinx.Graphics.Metal
+{
+    readonly struct BitMap
+    {
+        public const int IntSize = 64;
+
+        private const int IntShift = 6;
+        private const int IntMask = IntSize - 1;
+
+        private readonly long[] _masks;
+
+        public BitMap(int count)
+        {
+            _masks = new long[(count + IntMask) / IntSize];
+        }
+
+        public bool AnySet()
+        {
+            for (int i = 0; i < _masks.Length; i++)
+            {
+                if (_masks[i] != 0)
+                {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        public bool IsSet(int bit)
+        {
+            int wordIndex = bit >> IntShift;
+            int wordBit = bit & IntMask;
+
+            long wordMask = 1L << wordBit;
+
+            return (_masks[wordIndex] & wordMask) != 0;
+        }
+
+        public bool IsSet(int start, int end)
+        {
+            if (start == end)
+            {
+                return IsSet(start);
+            }
+
+            int startIndex = start >> IntShift;
+            int startBit = start & IntMask;
+            long startMask = -1L << startBit;
+
+            int endIndex = end >> IntShift;
+            int endBit = end & IntMask;
+            long endMask = (long)(ulong.MaxValue >> (IntMask - endBit));
+
+            if (startIndex == endIndex)
+            {
+                return (_masks[startIndex] & startMask & endMask) != 0;
+            }
+
+            if ((_masks[startIndex] & startMask) != 0)
+            {
+                return true;
+            }
+
+            for (int i = startIndex + 1; i < endIndex; i++)
+            {
+                if (_masks[i] != 0)
+                {
+                    return true;
+                }
+            }
+
+            if ((_masks[endIndex] & endMask) != 0)
+            {
+                return true;
+            }
+
+            return false;
+        }
+
+        public bool Set(int bit)
+        {
+            int wordIndex = bit >> IntShift;
+            int wordBit = bit & IntMask;
+
+            long wordMask = 1L << wordBit;
+
+            if ((_masks[wordIndex] & wordMask) != 0)
+            {
+                return false;
+            }
+
+            _masks[wordIndex] |= wordMask;
+
+            return true;
+        }
+
+        public void SetRange(int start, int end)
+        {
+            if (start == end)
+            {
+                Set(start);
+                return;
+            }
+
+            int startIndex = start >> IntShift;
+            int startBit = start & IntMask;
+            long startMask = -1L << startBit;
+
+            int endIndex = end >> IntShift;
+            int endBit = end & IntMask;
+            long endMask = (long)(ulong.MaxValue >> (IntMask - endBit));
+
+            if (startIndex == endIndex)
+            {
+                _masks[startIndex] |= startMask & endMask;
+            }
+            else
+            {
+                _masks[startIndex] |= startMask;
+
+                for (int i = startIndex + 1; i < endIndex; i++)
+                {
+                    _masks[i] |= -1;
+                }
+
+                _masks[endIndex] |= endMask;
+            }
+        }
+
+        public void Clear(int bit)
+        {
+            int wordIndex = bit >> IntShift;
+            int wordBit = bit & IntMask;
+
+            long wordMask = 1L << wordBit;
+
+            _masks[wordIndex] &= ~wordMask;
+        }
+
+        public void Clear()
+        {
+            for (int i = 0; i < _masks.Length; i++)
+            {
+                _masks[i] = 0;
+            }
+        }
+
+        public void ClearInt(int start, int end)
+        {
+            for (int i = start; i <= end; i++)
+            {
+                _masks[i] = 0;
+            }
+        }
+    }
+}

+ 385 - 0
src/Ryujinx.Graphics.Metal/BufferHolder.cs

@@ -0,0 +1,385 @@
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class BufferHolder : IDisposable
+    {
+        private CacheByRange<BufferHolder> _cachedConvertedBuffers;
+
+        public int Size { get; }
+
+        private readonly IntPtr _map;
+        private readonly MetalRenderer _renderer;
+        private readonly Pipeline _pipeline;
+
+        private readonly MultiFenceHolder _waitable;
+        private readonly Auto<DisposableBuffer> _buffer;
+
+        private readonly ReaderWriterLockSlim _flushLock;
+        private FenceHolder _flushFence;
+        private int _flushWaiting;
+
+        public BufferHolder(MetalRenderer renderer, Pipeline pipeline, MTLBuffer buffer, int size)
+        {
+            _renderer = renderer;
+            _pipeline = pipeline;
+            _map = buffer.Contents;
+            _waitable = new MultiFenceHolder(size);
+            _buffer = new Auto<DisposableBuffer>(new(buffer), _waitable);
+
+            _flushLock = new ReaderWriterLockSlim();
+
+            Size = size;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer()
+        {
+            return _buffer;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer(bool isWrite)
+        {
+            if (isWrite)
+            {
+                SignalWrite(0, Size);
+            }
+
+            return _buffer;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer(int offset, int size, bool isWrite)
+        {
+            if (isWrite)
+            {
+                SignalWrite(offset, size);
+            }
+
+            return _buffer;
+        }
+
+        public void SignalWrite(int offset, int size)
+        {
+            if (offset == 0 && size == Size)
+            {
+                _cachedConvertedBuffers.Clear();
+            }
+            else
+            {
+                _cachedConvertedBuffers.ClearRange(offset, size);
+            }
+        }
+
+        private void ClearFlushFence()
+        {
+            // Assumes _flushLock is held as writer.
+
+            if (_flushFence != null)
+            {
+                if (_flushWaiting == 0)
+                {
+                    _flushFence.Put();
+                }
+
+                _flushFence = null;
+            }
+        }
+
+        private void WaitForFlushFence()
+        {
+            if (_flushFence == null)
+            {
+                return;
+            }
+
+            // If storage has changed, make sure the fence has been reached so that the data is in place.
+            _flushLock.ExitReadLock();
+            _flushLock.EnterWriteLock();
+
+            if (_flushFence != null)
+            {
+                var fence = _flushFence;
+                Interlocked.Increment(ref _flushWaiting);
+
+                // Don't wait in the lock.
+
+                _flushLock.ExitWriteLock();
+
+                fence.Wait();
+
+                _flushLock.EnterWriteLock();
+
+                if (Interlocked.Decrement(ref _flushWaiting) == 0)
+                {
+                    fence.Put();
+                }
+
+                _flushFence = null;
+            }
+
+            // Assumes the _flushLock is held as reader, returns in same state.
+            _flushLock.ExitWriteLock();
+            _flushLock.EnterReadLock();
+        }
+
+        public PinnedSpan<byte> GetData(int offset, int size)
+        {
+            _flushLock.EnterReadLock();
+
+            WaitForFlushFence();
+
+            Span<byte> result;
+
+            if (_map != IntPtr.Zero)
+            {
+                result = GetDataStorage(offset, size);
+
+                // Need to be careful here, the buffer can't be unmapped while the data is being used.
+                _buffer.IncrementReferenceCount();
+
+                _flushLock.ExitReadLock();
+
+                return PinnedSpan<byte>.UnsafeFromSpan(result, _buffer.DecrementReferenceCount);
+            }
+
+            throw new InvalidOperationException("The buffer is not mapped");
+        }
+
+        public unsafe Span<byte> GetDataStorage(int offset, int size)
+        {
+            int mappingSize = Math.Min(size, Size - offset);
+
+            if (_map != IntPtr.Zero)
+            {
+                return new Span<byte>((void*)(_map + offset), mappingSize);
+            }
+
+            throw new InvalidOperationException("The buffer is not mapped.");
+        }
+
+        public unsafe void SetData(int offset, ReadOnlySpan<byte> data, CommandBufferScoped? cbs = null, bool allowCbsWait = true)
+        {
+            int dataSize = Math.Min(data.Length, Size - offset);
+            if (dataSize == 0)
+            {
+                return;
+            }
+
+            if (_map != IntPtr.Zero)
+            {
+                // If persistently mapped, set the data directly if the buffer is not currently in use.
+                bool isRented = _buffer.HasRentedCommandBufferDependency(_renderer.CommandBufferPool);
+
+                // If the buffer is rented, take a little more time and check if the use overlaps this handle.
+                bool needsFlush = isRented && _waitable.IsBufferRangeInUse(offset, dataSize, false);
+
+                if (!needsFlush)
+                {
+                    WaitForFences(offset, dataSize);
+
+                    data[..dataSize].CopyTo(new Span<byte>((void*)(_map + offset), dataSize));
+
+                    SignalWrite(offset, dataSize);
+
+                    return;
+                }
+            }
+
+            if (cbs != null &&
+                cbs.Value.Encoders.CurrentEncoderType == EncoderType.Render &&
+                !(_buffer.HasCommandBufferDependency(cbs.Value) &&
+                  _waitable.IsBufferRangeInUse(cbs.Value.CommandBufferIndex, offset, dataSize)))
+            {
+                // If the buffer hasn't been used on the command buffer yet, try to preload the data.
+                // This avoids ending and beginning render passes on each buffer data upload.
+
+                cbs = _pipeline.GetPreloadCommandBuffer();
+            }
+
+            if (allowCbsWait)
+            {
+                _renderer.BufferManager.StagingBuffer.PushData(_renderer.CommandBufferPool, cbs, this, offset, data);
+            }
+            else
+            {
+                bool rentCbs = cbs == null;
+                if (rentCbs)
+                {
+                    cbs = _renderer.CommandBufferPool.Rent();
+                }
+
+                if (!_renderer.BufferManager.StagingBuffer.TryPushData(cbs.Value, this, offset, data))
+                {
+                    // Need to do a slow upload.
+                    BufferHolder srcHolder = _renderer.BufferManager.Create(dataSize);
+                    srcHolder.SetDataUnchecked(0, data);
+
+                    var srcBuffer = srcHolder.GetBuffer();
+                    var dstBuffer = this.GetBuffer(true);
+
+                    Copy(cbs.Value, srcBuffer, dstBuffer, 0, offset, dataSize);
+
+                    srcHolder.Dispose();
+                }
+
+                if (rentCbs)
+                {
+                    cbs.Value.Dispose();
+                }
+            }
+        }
+
+        public unsafe void SetDataUnchecked(int offset, ReadOnlySpan<byte> data)
+        {
+            int dataSize = Math.Min(data.Length, Size - offset);
+            if (dataSize == 0)
+            {
+                return;
+            }
+
+            if (_map != IntPtr.Zero)
+            {
+                data[..dataSize].CopyTo(new Span<byte>((void*)(_map + offset), dataSize));
+            }
+        }
+
+        public void SetDataUnchecked<T>(int offset, ReadOnlySpan<T> data) where T : unmanaged
+        {
+            SetDataUnchecked(offset, MemoryMarshal.AsBytes(data));
+        }
+
+        public static void Copy(
+            CommandBufferScoped cbs,
+            Auto<DisposableBuffer> src,
+            Auto<DisposableBuffer> dst,
+            int srcOffset,
+            int dstOffset,
+            int size,
+            bool registerSrcUsage = true)
+        {
+            var srcBuffer = registerSrcUsage ? src.Get(cbs, srcOffset, size).Value : src.GetUnsafe().Value;
+            var dstbuffer = dst.Get(cbs, dstOffset, size, true).Value;
+
+            cbs.Encoders.EnsureBlitEncoder().CopyFromBuffer(
+                srcBuffer,
+                (ulong)srcOffset,
+                dstbuffer,
+                (ulong)dstOffset,
+                (ulong)size);
+        }
+
+        public void WaitForFences()
+        {
+            _waitable.WaitForFences();
+        }
+
+        public void WaitForFences(int offset, int size)
+        {
+            _waitable.WaitForFences(offset, size);
+        }
+
+        private bool BoundToRange(int offset, ref int size)
+        {
+            if (offset >= Size)
+            {
+                return false;
+            }
+
+            size = Math.Min(Size - offset, size);
+
+            return true;
+        }
+
+        public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, int offset, int size)
+        {
+            if (!BoundToRange(offset, ref size))
+            {
+                return null;
+            }
+
+            var key = new I8ToI16CacheKey(_renderer);
+
+            if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder))
+            {
+                holder = _renderer.BufferManager.Create((size * 2 + 3) & ~3);
+
+                _renderer.HelperShader.ConvertI8ToI16(cbs, this, holder, offset, size);
+
+                key.SetBuffer(holder.GetBuffer());
+
+                _cachedConvertedBuffers.Add(offset, size, key, holder);
+            }
+
+            return holder.GetBuffer();
+        }
+
+        public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, int offset, int size, IndexBufferPattern pattern, int indexSize)
+        {
+            if (!BoundToRange(offset, ref size))
+            {
+                return null;
+            }
+
+            var key = new TopologyConversionCacheKey(_renderer, pattern, indexSize);
+
+            if (!_cachedConvertedBuffers.TryGetValue(offset, size, key, out var holder))
+            {
+                // The destination index size is always I32.
+
+                int indexCount = size / indexSize;
+
+                int convertedCount = pattern.GetConvertedCount(indexCount);
+
+                holder = _renderer.BufferManager.Create(convertedCount * 4);
+
+                _renderer.HelperShader.ConvertIndexBuffer(cbs, this, holder, pattern, indexSize, offset, indexCount);
+
+                key.SetBuffer(holder.GetBuffer());
+
+                _cachedConvertedBuffers.Add(offset, size, key, holder);
+            }
+
+            return holder.GetBuffer();
+        }
+
+        public bool TryGetCachedConvertedBuffer(int offset, int size, ICacheKey key, out BufferHolder holder)
+        {
+            return _cachedConvertedBuffers.TryGetValue(offset, size, key, out holder);
+        }
+
+        public void AddCachedConvertedBuffer(int offset, int size, ICacheKey key, BufferHolder holder)
+        {
+            _cachedConvertedBuffers.Add(offset, size, key, holder);
+        }
+
+        public void AddCachedConvertedBufferDependency(int offset, int size, ICacheKey key, Dependency dependency)
+        {
+            _cachedConvertedBuffers.AddDependency(offset, size, key, dependency);
+        }
+
+        public void RemoveCachedConvertedBuffer(int offset, int size, ICacheKey key)
+        {
+            _cachedConvertedBuffers.Remove(offset, size, key);
+        }
+
+
+        public void Dispose()
+        {
+            _pipeline.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size);
+
+            _buffer.Dispose();
+            _cachedConvertedBuffers.Dispose();
+
+            _flushLock.EnterWriteLock();
+
+            ClearFlushFence();
+
+            _flushLock.ExitWriteLock();
+        }
+    }
+}

+ 237 - 0
src/Ryujinx.Graphics.Metal/BufferManager.cs

@@ -0,0 +1,237 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly struct ScopedTemporaryBuffer : IDisposable
+    {
+        private readonly BufferManager _bufferManager;
+        private readonly bool _isReserved;
+
+        public readonly BufferRange Range;
+        public readonly BufferHolder Holder;
+
+        public BufferHandle Handle => Range.Handle;
+        public int Offset => Range.Offset;
+
+        public ScopedTemporaryBuffer(BufferManager bufferManager, BufferHolder holder, BufferHandle handle, int offset, int size, bool isReserved)
+        {
+            _bufferManager = bufferManager;
+
+            Range = new BufferRange(handle, offset, size);
+            Holder = holder;
+
+            _isReserved = isReserved;
+        }
+
+        public void Dispose()
+        {
+            if (!_isReserved)
+            {
+                _bufferManager.Delete(Range.Handle);
+            }
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    class BufferManager : IDisposable
+    {
+        private readonly IdList<BufferHolder> _buffers;
+
+        private readonly MTLDevice _device;
+        private readonly MetalRenderer _renderer;
+        private readonly Pipeline _pipeline;
+
+        public int BufferCount { get; private set; }
+
+        public StagingBuffer StagingBuffer { get; }
+
+        public BufferManager(MTLDevice device, MetalRenderer renderer, Pipeline pipeline)
+        {
+            _device = device;
+            _renderer = renderer;
+            _pipeline = pipeline;
+            _buffers = new IdList<BufferHolder>();
+
+            StagingBuffer = new StagingBuffer(_renderer, this);
+        }
+
+        public BufferHandle Create(nint pointer, int size)
+        {
+            // TODO: This is the wrong Metal method, we need no-copy which SharpMetal isn't giving us.
+            var buffer = _device.NewBuffer(pointer, (ulong)size, MTLResourceOptions.ResourceStorageModeShared);
+
+            if (buffer == IntPtr.Zero)
+            {
+                Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X}, and pointer 0x{pointer:X}.");
+
+                return BufferHandle.Null;
+            }
+
+            var holder = new BufferHolder(_renderer, _pipeline, buffer, size);
+
+            BufferCount++;
+
+            ulong handle64 = (uint)_buffers.Add(holder);
+
+            return Unsafe.As<ulong, BufferHandle>(ref handle64);
+        }
+
+        public BufferHandle CreateWithHandle(int size)
+        {
+            return CreateWithHandle(size, out _);
+        }
+
+        public BufferHandle CreateWithHandle(int size, out BufferHolder holder)
+        {
+            holder = Create(size);
+
+            if (holder == null)
+            {
+                return BufferHandle.Null;
+            }
+
+            BufferCount++;
+
+            ulong handle64 = (uint)_buffers.Add(holder);
+
+            return Unsafe.As<ulong, BufferHandle>(ref handle64);
+        }
+
+        public ScopedTemporaryBuffer ReserveOrCreate(CommandBufferScoped cbs, int size)
+        {
+            StagingBufferReserved? result = StagingBuffer.TryReserveData(cbs, size);
+
+            if (result.HasValue)
+            {
+                return new ScopedTemporaryBuffer(this, result.Value.Buffer, StagingBuffer.Handle, result.Value.Offset, result.Value.Size, true);
+            }
+            else
+            {
+                // Create a temporary buffer.
+                BufferHandle handle = CreateWithHandle(size, out BufferHolder holder);
+
+                return new ScopedTemporaryBuffer(this, holder, handle, 0, size, false);
+            }
+        }
+
+        public BufferHolder Create(int size)
+        {
+            var buffer = _device.NewBuffer((ulong)size, MTLResourceOptions.ResourceStorageModeShared);
+
+            if (buffer != IntPtr.Zero)
+            {
+                return new BufferHolder(_renderer, _pipeline, buffer, size);
+            }
+
+            Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X}.");
+
+            return null;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer(BufferHandle handle, bool isWrite, out int size)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                size = holder.Size;
+                return holder.GetBuffer(isWrite);
+            }
+
+            size = 0;
+            return null;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer(BufferHandle handle, int offset, int size, bool isWrite)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                return holder.GetBuffer(offset, size, isWrite);
+            }
+
+            return null;
+        }
+
+        public Auto<DisposableBuffer> GetBuffer(BufferHandle handle, bool isWrite)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                return holder.GetBuffer(isWrite);
+            }
+
+            return null;
+        }
+
+        public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                return holder.GetBufferI8ToI16(cbs, offset, size);
+            }
+
+            return null;
+        }
+
+        public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, IndexBufferPattern pattern, int indexSize)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                return holder.GetBufferTopologyConversion(cbs, offset, size, pattern, indexSize);
+            }
+
+            return null;
+        }
+
+        public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                return holder.GetData(offset, size);
+            }
+
+            return new PinnedSpan<byte>();
+        }
+
+        public void SetData<T>(BufferHandle handle, int offset, ReadOnlySpan<T> data) where T : unmanaged
+        {
+            SetData(handle, offset, MemoryMarshal.Cast<T, byte>(data), null);
+        }
+
+        public void SetData(BufferHandle handle, int offset, ReadOnlySpan<byte> data, CommandBufferScoped? cbs)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                holder.SetData(offset, data, cbs);
+            }
+        }
+
+        public void Delete(BufferHandle handle)
+        {
+            if (TryGetBuffer(handle, out var holder))
+            {
+                holder.Dispose();
+                _buffers.Remove((int)Unsafe.As<BufferHandle, ulong>(ref handle));
+            }
+        }
+
+        private bool TryGetBuffer(BufferHandle handle, out BufferHolder holder)
+        {
+            return _buffers.TryGetValue((int)Unsafe.As<BufferHandle, ulong>(ref handle), out holder);
+        }
+
+        public void Dispose()
+        {
+            StagingBuffer.Dispose();
+
+            foreach (var buffer in _buffers)
+            {
+                buffer.Dispose();
+            }
+        }
+    }
+}

+ 85 - 0
src/Ryujinx.Graphics.Metal/BufferUsageBitmap.cs

@@ -0,0 +1,85 @@
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    internal class BufferUsageBitmap
+    {
+        private readonly BitMap _bitmap;
+        private readonly int _size;
+        private readonly int _granularity;
+        private readonly int _bits;
+        private readonly int _writeBitOffset;
+
+        private readonly int _intsPerCb;
+        private readonly int _bitsPerCb;
+
+        public BufferUsageBitmap(int size, int granularity)
+        {
+            _size = size;
+            _granularity = granularity;
+
+            // There are two sets of bits - one for read tracking, and the other for write.
+            int bits = (size + (granularity - 1)) / granularity;
+            _writeBitOffset = bits;
+            _bits = bits << 1;
+
+            _intsPerCb = (_bits + (BitMap.IntSize - 1)) / BitMap.IntSize;
+            _bitsPerCb = _intsPerCb * BitMap.IntSize;
+
+            _bitmap = new BitMap(_bitsPerCb * CommandBufferPool.MaxCommandBuffers);
+        }
+
+        public void Add(int cbIndex, int offset, int size, bool write)
+        {
+            if (size == 0)
+            {
+                return;
+            }
+
+            // Some usages can be out of bounds (vertex buffer on amd), so bound if necessary.
+            if (offset + size > _size)
+            {
+                size = _size - offset;
+            }
+
+            int cbBase = cbIndex * _bitsPerCb + (write ? _writeBitOffset : 0);
+            int start = cbBase + offset / _granularity;
+            int end = cbBase + (offset + size - 1) / _granularity;
+
+            _bitmap.SetRange(start, end);
+        }
+
+        public bool OverlapsWith(int cbIndex, int offset, int size, bool write = false)
+        {
+            if (size == 0)
+            {
+                return false;
+            }
+
+            int cbBase = cbIndex * _bitsPerCb + (write ? _writeBitOffset : 0);
+            int start = cbBase + offset / _granularity;
+            int end = cbBase + (offset + size - 1) / _granularity;
+
+            return _bitmap.IsSet(start, end);
+        }
+
+        public bool OverlapsWith(int offset, int size, bool write)
+        {
+            for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++)
+            {
+                if (OverlapsWith(i, offset, size, write))
+                {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        public void Clear(int cbIndex)
+        {
+            _bitmap.ClearInt(cbIndex * _intsPerCb, (cbIndex + 1) * _intsPerCb - 1);
+        }
+    }
+}

+ 294 - 0
src/Ryujinx.Graphics.Metal/CacheByRange.cs

@@ -0,0 +1,294 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    interface ICacheKey : IDisposable
+    {
+        bool KeyEqual(ICacheKey other);
+    }
+
+    [SupportedOSPlatform("macos")]
+    struct I8ToI16CacheKey : ICacheKey
+    {
+        // Used to notify the pipeline that bindings have invalidated on dispose.
+        // private readonly MetalRenderer _renderer;
+        // private Auto<DisposableBuffer> _buffer;
+
+        public I8ToI16CacheKey(MetalRenderer renderer)
+        {
+            // _renderer = renderer;
+            // _buffer = null;
+        }
+
+        public readonly bool KeyEqual(ICacheKey other)
+        {
+            return other is I8ToI16CacheKey;
+        }
+
+        public readonly void SetBuffer(Auto<DisposableBuffer> buffer)
+        {
+            // _buffer = buffer;
+        }
+
+        public readonly void Dispose()
+        {
+            // TODO: Tell pipeline buffer is dirty!
+            // _renderer.PipelineInternal.DirtyIndexBuffer(_buffer);
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    readonly struct TopologyConversionCacheKey : ICacheKey
+    {
+        private readonly IndexBufferPattern _pattern;
+        private readonly int _indexSize;
+
+        // Used to notify the pipeline that bindings have invalidated on dispose.
+        // private readonly MetalRenderer _renderer;
+        // private Auto<DisposableBuffer> _buffer;
+
+        public TopologyConversionCacheKey(MetalRenderer renderer, IndexBufferPattern pattern, int indexSize)
+        {
+            // _renderer = renderer;
+            // _buffer = null;
+            _pattern = pattern;
+            _indexSize = indexSize;
+        }
+
+        public readonly bool KeyEqual(ICacheKey other)
+        {
+            return other is TopologyConversionCacheKey entry &&
+                   entry._pattern == _pattern &&
+                   entry._indexSize == _indexSize;
+        }
+
+        public void SetBuffer(Auto<DisposableBuffer> buffer)
+        {
+            // _buffer = buffer;
+        }
+
+        public readonly void Dispose()
+        {
+            // TODO: Tell pipeline buffer is dirty!
+            // _renderer.PipelineInternal.DirtyVertexBuffer(_buffer);
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    readonly struct Dependency
+    {
+        private readonly BufferHolder _buffer;
+        private readonly int _offset;
+        private readonly int _size;
+        private readonly ICacheKey _key;
+
+        public Dependency(BufferHolder buffer, int offset, int size, ICacheKey key)
+        {
+            _buffer = buffer;
+            _offset = offset;
+            _size = size;
+            _key = key;
+        }
+
+        public void RemoveFromOwner()
+        {
+            _buffer.RemoveCachedConvertedBuffer(_offset, _size, _key);
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    struct CacheByRange<T> where T : IDisposable
+    {
+        private struct Entry
+        {
+            public readonly ICacheKey Key;
+            public readonly T Value;
+            public List<Dependency> DependencyList;
+
+            public Entry(ICacheKey key, T value)
+            {
+                Key = key;
+                Value = value;
+                DependencyList = null;
+            }
+
+            public readonly void InvalidateDependencies()
+            {
+                if (DependencyList != null)
+                {
+                    foreach (Dependency dependency in DependencyList)
+                    {
+                        dependency.RemoveFromOwner();
+                    }
+
+                    DependencyList.Clear();
+                }
+            }
+        }
+
+        private Dictionary<ulong, List<Entry>> _ranges;
+
+        public void Add(int offset, int size, ICacheKey key, T value)
+        {
+            List<Entry> entries = GetEntries(offset, size);
+
+            entries.Add(new Entry(key, value));
+        }
+
+        public void AddDependency(int offset, int size, ICacheKey key, Dependency dependency)
+        {
+            List<Entry> entries = GetEntries(offset, size);
+
+            for (int i = 0; i < entries.Count; i++)
+            {
+                Entry entry = entries[i];
+
+                if (entry.Key.KeyEqual(key))
+                {
+                    if (entry.DependencyList == null)
+                    {
+                        entry.DependencyList = new List<Dependency>();
+                        entries[i] = entry;
+                    }
+
+                    entry.DependencyList.Add(dependency);
+
+                    break;
+                }
+            }
+        }
+
+        public void Remove(int offset, int size, ICacheKey key)
+        {
+            List<Entry> entries = GetEntries(offset, size);
+
+            for (int i = 0; i < entries.Count; i++)
+            {
+                Entry entry = entries[i];
+
+                if (entry.Key.KeyEqual(key))
+                {
+                    entries.RemoveAt(i--);
+
+                    DestroyEntry(entry);
+                }
+            }
+
+            if (entries.Count == 0)
+            {
+                _ranges.Remove(PackRange(offset, size));
+            }
+        }
+
+        public bool TryGetValue(int offset, int size, ICacheKey key, out T value)
+        {
+            List<Entry> entries = GetEntries(offset, size);
+
+            foreach (Entry entry in entries)
+            {
+                if (entry.Key.KeyEqual(key))
+                {
+                    value = entry.Value;
+
+                    return true;
+                }
+            }
+
+            value = default;
+            return false;
+        }
+
+        public void Clear()
+        {
+            if (_ranges != null)
+            {
+                foreach (List<Entry> entries in _ranges.Values)
+                {
+                    foreach (Entry entry in entries)
+                    {
+                        DestroyEntry(entry);
+                    }
+                }
+
+                _ranges.Clear();
+                _ranges = null;
+            }
+        }
+
+        public readonly void ClearRange(int offset, int size)
+        {
+            if (_ranges != null && _ranges.Count > 0)
+            {
+                int end = offset + size;
+
+                List<ulong> toRemove = null;
+
+                foreach (KeyValuePair<ulong, List<Entry>> range in _ranges)
+                {
+                    (int rOffset, int rSize) = UnpackRange(range.Key);
+
+                    int rEnd = rOffset + rSize;
+
+                    if (rEnd > offset && rOffset < end)
+                    {
+                        List<Entry> entries = range.Value;
+
+                        foreach (Entry entry in entries)
+                        {
+                            DestroyEntry(entry);
+                        }
+
+                        (toRemove ??= new List<ulong>()).Add(range.Key);
+                    }
+                }
+
+                if (toRemove != null)
+                {
+                    foreach (ulong range in toRemove)
+                    {
+                        _ranges.Remove(range);
+                    }
+                }
+            }
+        }
+
+        private List<Entry> GetEntries(int offset, int size)
+        {
+            _ranges ??= new Dictionary<ulong, List<Entry>>();
+
+            ulong key = PackRange(offset, size);
+
+            if (!_ranges.TryGetValue(key, out List<Entry> value))
+            {
+                value = new List<Entry>();
+                _ranges.Add(key, value);
+            }
+
+            return value;
+        }
+
+        private static void DestroyEntry(Entry entry)
+        {
+            entry.Key.Dispose();
+            entry.Value?.Dispose();
+            entry.InvalidateDependencies();
+        }
+
+        private static ulong PackRange(int offset, int size)
+        {
+            return (uint)offset | ((ulong)size << 32);
+        }
+
+        private static (int offset, int size) UnpackRange(ulong range)
+        {
+            return ((int)range, (int)(range >> 32));
+        }
+
+        public void Dispose()
+        {
+            Clear();
+        }
+    }
+}

+ 170 - 0
src/Ryujinx.Graphics.Metal/CommandBufferEncoder.cs

@@ -0,0 +1,170 @@
+using Ryujinx.Graphics.Metal;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+
+interface IEncoderFactory
+{
+    MTLRenderCommandEncoder CreateRenderCommandEncoder();
+    MTLComputeCommandEncoder CreateComputeCommandEncoder();
+}
+
+/// <summary>
+/// Tracks active encoder object for a command buffer.
+/// </summary>
+[SupportedOSPlatform("macos")]
+class CommandBufferEncoder
+{
+    public EncoderType CurrentEncoderType { get; private set; } = EncoderType.None;
+
+    public MTLBlitCommandEncoder BlitEncoder => new(CurrentEncoder.Value);
+
+    public MTLComputeCommandEncoder ComputeEncoder => new(CurrentEncoder.Value);
+
+    public MTLRenderCommandEncoder RenderEncoder => new(CurrentEncoder.Value);
+
+    internal MTLCommandEncoder? CurrentEncoder { get; private set; }
+
+    private MTLCommandBuffer _commandBuffer;
+    private IEncoderFactory _encoderFactory;
+
+    public void Initialize(MTLCommandBuffer commandBuffer, IEncoderFactory encoderFactory)
+    {
+        _commandBuffer = commandBuffer;
+        _encoderFactory = encoderFactory;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public MTLRenderCommandEncoder EnsureRenderEncoder()
+    {
+        if (CurrentEncoderType != EncoderType.Render)
+        {
+            return BeginRenderPass();
+        }
+
+        return RenderEncoder;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public MTLBlitCommandEncoder EnsureBlitEncoder()
+    {
+        if (CurrentEncoderType != EncoderType.Blit)
+        {
+            return BeginBlitPass();
+        }
+
+        return BlitEncoder;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public MTLComputeCommandEncoder EnsureComputeEncoder()
+    {
+        if (CurrentEncoderType != EncoderType.Compute)
+        {
+            return BeginComputePass();
+        }
+
+        return ComputeEncoder;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public bool TryGetRenderEncoder(out MTLRenderCommandEncoder encoder)
+    {
+        if (CurrentEncoderType != EncoderType.Render)
+        {
+            encoder = default;
+            return false;
+        }
+
+        encoder = RenderEncoder;
+        return true;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public bool TryGetBlitEncoder(out MTLBlitCommandEncoder encoder)
+    {
+        if (CurrentEncoderType != EncoderType.Blit)
+        {
+            encoder = default;
+            return false;
+        }
+
+        encoder = BlitEncoder;
+        return true;
+    }
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public bool TryGetComputeEncoder(out MTLComputeCommandEncoder encoder)
+    {
+        if (CurrentEncoderType != EncoderType.Compute)
+        {
+            encoder = default;
+            return false;
+        }
+
+        encoder = ComputeEncoder;
+        return true;
+    }
+
+    public void EndCurrentPass()
+    {
+        if (CurrentEncoder != null)
+        {
+            switch (CurrentEncoderType)
+            {
+                case EncoderType.Blit:
+                    BlitEncoder.EndEncoding();
+                    CurrentEncoder = null;
+                    break;
+                case EncoderType.Compute:
+                    ComputeEncoder.EndEncoding();
+                    CurrentEncoder = null;
+                    break;
+                case EncoderType.Render:
+                    RenderEncoder.EndEncoding();
+                    CurrentEncoder = null;
+                    break;
+                default:
+                    throw new InvalidOperationException();
+            }
+
+            CurrentEncoderType = EncoderType.None;
+        }
+    }
+
+    private MTLRenderCommandEncoder BeginRenderPass()
+    {
+        EndCurrentPass();
+
+        var renderCommandEncoder = _encoderFactory.CreateRenderCommandEncoder();
+
+        CurrentEncoder = renderCommandEncoder;
+        CurrentEncoderType = EncoderType.Render;
+
+        return renderCommandEncoder;
+    }
+
+    private MTLBlitCommandEncoder BeginBlitPass()
+    {
+        EndCurrentPass();
+
+        using var descriptor = new MTLBlitPassDescriptor();
+        var blitCommandEncoder = _commandBuffer.BlitCommandEncoder(descriptor);
+
+        CurrentEncoder = blitCommandEncoder;
+        CurrentEncoderType = EncoderType.Blit;
+        return blitCommandEncoder;
+    }
+
+    private MTLComputeCommandEncoder BeginComputePass()
+    {
+        EndCurrentPass();
+
+        var computeCommandEncoder = _encoderFactory.CreateComputeCommandEncoder();
+
+        CurrentEncoder = computeCommandEncoder;
+        CurrentEncoderType = EncoderType.Compute;
+        return computeCommandEncoder;
+    }
+}

+ 289 - 0
src/Ryujinx.Graphics.Metal/CommandBufferPool.cs

@@ -0,0 +1,289 @@
+using SharpMetal.Metal;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class CommandBufferPool : IDisposable
+    {
+        public const int MaxCommandBuffers = 16;
+
+        private readonly int _totalCommandBuffers;
+        private readonly int _totalCommandBuffersMask;
+        private readonly MTLCommandQueue _queue;
+        private readonly Thread _owner;
+        private IEncoderFactory _defaultEncoderFactory;
+
+        public bool OwnedByCurrentThread => _owner == Thread.CurrentThread;
+
+        [SupportedOSPlatform("macos")]
+        private struct ReservedCommandBuffer
+        {
+            public bool InUse;
+            public bool InConsumption;
+            public int SubmissionCount;
+            public MTLCommandBuffer CommandBuffer;
+            public CommandBufferEncoder Encoders;
+            public FenceHolder Fence;
+
+            public List<IAuto> Dependants;
+            public List<MultiFenceHolder> Waitables;
+
+            public void Use(MTLCommandQueue queue, IEncoderFactory stateManager)
+            {
+                MTLCommandBufferDescriptor descriptor = new();
+#if DEBUG
+                descriptor.ErrorOptions = MTLCommandBufferErrorOption.EncoderExecutionStatus;
+#endif
+
+                CommandBuffer = queue.CommandBuffer(descriptor);
+                Fence = new FenceHolder(CommandBuffer);
+
+                Encoders.Initialize(CommandBuffer, stateManager);
+
+                InUse = true;
+            }
+
+            public void Initialize()
+            {
+                Dependants = new List<IAuto>();
+                Waitables = new List<MultiFenceHolder>();
+                Encoders = new CommandBufferEncoder();
+            }
+        }
+
+        private readonly ReservedCommandBuffer[] _commandBuffers;
+
+        private readonly int[] _queuedIndexes;
+        private int _queuedIndexesPtr;
+        private int _queuedCount;
+        private int _inUseCount;
+
+        public CommandBufferPool(MTLCommandQueue queue, bool isLight = false)
+        {
+            _queue = queue;
+            _owner = Thread.CurrentThread;
+
+            _totalCommandBuffers = isLight ? 2 : MaxCommandBuffers;
+            _totalCommandBuffersMask = _totalCommandBuffers - 1;
+
+            _commandBuffers = new ReservedCommandBuffer[_totalCommandBuffers];
+
+            _queuedIndexes = new int[_totalCommandBuffers];
+            _queuedIndexesPtr = 0;
+            _queuedCount = 0;
+        }
+
+        public void Initialize(IEncoderFactory encoderFactory)
+        {
+            _defaultEncoderFactory = encoderFactory;
+
+            for (int i = 0; i < _totalCommandBuffers; i++)
+            {
+                _commandBuffers[i].Initialize();
+                WaitAndDecrementRef(i);
+            }
+        }
+
+        public void AddDependant(int cbIndex, IAuto dependant)
+        {
+            dependant.IncrementReferenceCount();
+            _commandBuffers[cbIndex].Dependants.Add(dependant);
+        }
+
+        public void AddWaitable(MultiFenceHolder waitable)
+        {
+            lock (_commandBuffers)
+            {
+                for (int i = 0; i < _totalCommandBuffers; i++)
+                {
+                    ref var entry = ref _commandBuffers[i];
+
+                    if (entry.InConsumption)
+                    {
+                        AddWaitable(i, waitable);
+                    }
+                }
+            }
+        }
+
+        public void AddInUseWaitable(MultiFenceHolder waitable)
+        {
+            lock (_commandBuffers)
+            {
+                for (int i = 0; i < _totalCommandBuffers; i++)
+                {
+                    ref var entry = ref _commandBuffers[i];
+
+                    if (entry.InUse)
+                    {
+                        AddWaitable(i, waitable);
+                    }
+                }
+            }
+        }
+
+        public void AddWaitable(int cbIndex, MultiFenceHolder waitable)
+        {
+            ref var entry = ref _commandBuffers[cbIndex];
+            if (waitable.AddFence(cbIndex, entry.Fence))
+            {
+                entry.Waitables.Add(waitable);
+            }
+        }
+
+        public bool IsFenceOnRentedCommandBuffer(FenceHolder fence)
+        {
+            lock (_commandBuffers)
+            {
+                for (int i = 0; i < _totalCommandBuffers; i++)
+                {
+                    ref var entry = ref _commandBuffers[i];
+
+                    if (entry.InUse && entry.Fence == fence)
+                    {
+                        return true;
+                    }
+                }
+            }
+
+            return false;
+        }
+
+        public FenceHolder GetFence(int cbIndex)
+        {
+            return _commandBuffers[cbIndex].Fence;
+        }
+
+        public int GetSubmissionCount(int cbIndex)
+        {
+            return _commandBuffers[cbIndex].SubmissionCount;
+        }
+
+        private int FreeConsumed(bool wait)
+        {
+            int freeEntry = 0;
+
+            while (_queuedCount > 0)
+            {
+                int index = _queuedIndexes[_queuedIndexesPtr];
+
+                ref var entry = ref _commandBuffers[index];
+
+                if (wait || !entry.InConsumption || entry.Fence.IsSignaled())
+                {
+                    WaitAndDecrementRef(index);
+
+                    wait = false;
+                    freeEntry = index;
+
+                    _queuedCount--;
+                    _queuedIndexesPtr = (_queuedIndexesPtr + 1) % _totalCommandBuffers;
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            return freeEntry;
+        }
+
+        public CommandBufferScoped ReturnAndRent(CommandBufferScoped cbs)
+        {
+            Return(cbs);
+            return Rent();
+        }
+
+        public CommandBufferScoped Rent()
+        {
+            lock (_commandBuffers)
+            {
+                int cursor = FreeConsumed(_inUseCount + _queuedCount == _totalCommandBuffers);
+
+                for (int i = 0; i < _totalCommandBuffers; i++)
+                {
+                    ref var entry = ref _commandBuffers[cursor];
+
+                    if (!entry.InUse && !entry.InConsumption)
+                    {
+                        entry.Use(_queue, _defaultEncoderFactory);
+
+                        _inUseCount++;
+
+                        return new CommandBufferScoped(this, entry.CommandBuffer, entry.Encoders, cursor);
+                    }
+
+                    cursor = (cursor + 1) & _totalCommandBuffersMask;
+                }
+            }
+
+            throw new InvalidOperationException($"Out of command buffers (In use: {_inUseCount}, queued: {_queuedCount}, total: {_totalCommandBuffers})");
+        }
+
+        public void Return(CommandBufferScoped cbs)
+        {
+            // Ensure the encoder is committed.
+            cbs.Encoders.EndCurrentPass();
+
+            lock (_commandBuffers)
+            {
+                int cbIndex = cbs.CommandBufferIndex;
+
+                ref var entry = ref _commandBuffers[cbIndex];
+
+                Debug.Assert(entry.InUse);
+                Debug.Assert(entry.CommandBuffer.NativePtr == cbs.CommandBuffer.NativePtr);
+                entry.InUse = false;
+                entry.InConsumption = true;
+                entry.SubmissionCount++;
+                _inUseCount--;
+
+                var commandBuffer = entry.CommandBuffer;
+                commandBuffer.Commit();
+
+                int ptr = (_queuedIndexesPtr + _queuedCount) % _totalCommandBuffers;
+                _queuedIndexes[ptr] = cbIndex;
+                _queuedCount++;
+            }
+        }
+
+        private void WaitAndDecrementRef(int cbIndex)
+        {
+            ref var entry = ref _commandBuffers[cbIndex];
+
+            if (entry.InConsumption)
+            {
+                entry.Fence.Wait();
+                entry.InConsumption = false;
+            }
+
+            foreach (var dependant in entry.Dependants)
+            {
+                dependant.DecrementReferenceCount(cbIndex);
+            }
+
+            foreach (var waitable in entry.Waitables)
+            {
+                waitable.RemoveFence(cbIndex);
+                waitable.RemoveBufferUses(cbIndex);
+            }
+
+            entry.Dependants.Clear();
+            entry.Waitables.Clear();
+            entry.Fence?.Dispose();
+        }
+
+        public void Dispose()
+        {
+            for (int i = 0; i < _totalCommandBuffers; i++)
+            {
+                WaitAndDecrementRef(i);
+            }
+        }
+    }
+}

+ 43 - 0
src/Ryujinx.Graphics.Metal/CommandBufferScoped.cs

@@ -0,0 +1,43 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly struct CommandBufferScoped : IDisposable
+    {
+        private readonly CommandBufferPool _pool;
+        public MTLCommandBuffer CommandBuffer { get; }
+        public CommandBufferEncoder Encoders { get; }
+        public int CommandBufferIndex { get; }
+
+        public CommandBufferScoped(CommandBufferPool pool, MTLCommandBuffer commandBuffer, CommandBufferEncoder encoders, int commandBufferIndex)
+        {
+            _pool = pool;
+            CommandBuffer = commandBuffer;
+            Encoders = encoders;
+            CommandBufferIndex = commandBufferIndex;
+        }
+
+        public void AddDependant(IAuto dependant)
+        {
+            _pool.AddDependant(CommandBufferIndex, dependant);
+        }
+
+        public void AddWaitable(MultiFenceHolder waitable)
+        {
+            _pool.AddWaitable(CommandBufferIndex, waitable);
+        }
+
+        public FenceHolder GetFence()
+        {
+            return _pool.GetFence(CommandBufferIndex);
+        }
+
+        public void Dispose()
+        {
+            _pool?.Return(this);
+        }
+    }
+}

+ 41 - 0
src/Ryujinx.Graphics.Metal/Constants.cs

@@ -0,0 +1,41 @@
+namespace Ryujinx.Graphics.Metal
+{
+    static class Constants
+    {
+        public const int MaxShaderStages = 5;
+        public const int MaxVertexBuffers = 16;
+        public const int MaxUniformBuffersPerStage = 18;
+        public const int MaxStorageBuffersPerStage = 16;
+        public const int MaxTexturesPerStage = 64;
+        public const int MaxImagesPerStage = 16;
+
+        public const int MaxUniformBufferBindings = MaxUniformBuffersPerStage * MaxShaderStages;
+        public const int MaxStorageBufferBindings = MaxStorageBuffersPerStage * MaxShaderStages;
+        public const int MaxTextureBindings = MaxTexturesPerStage * MaxShaderStages;
+        public const int MaxImageBindings = MaxImagesPerStage * MaxShaderStages;
+        public const int MaxColorAttachments = 8;
+        public const int MaxViewports = 16;
+        // TODO: Check this value
+        public const int MaxVertexAttributes = 31;
+
+        public const int MinResourceAlignment = 16;
+
+        // Must match constants set in shader generation
+        public const uint ZeroBufferIndex = MaxVertexBuffers;
+        public const uint BaseSetIndex = MaxVertexBuffers + 1;
+
+        public const uint ConstantBuffersIndex = BaseSetIndex;
+        public const uint StorageBuffersIndex = BaseSetIndex + 1;
+        public const uint TexturesIndex = BaseSetIndex + 2;
+        public const uint ImagesIndex = BaseSetIndex + 3;
+
+        public const uint ConstantBuffersSetIndex = 0;
+        public const uint StorageBuffersSetIndex = 1;
+        public const uint TexturesSetIndex = 2;
+        public const uint ImagesSetIndex = 3;
+
+        public const uint MaximumBufferArgumentTableEntries = 31;
+
+        public const uint MaximumExtraSets = MaximumBufferArgumentTableEntries - ImagesIndex;
+    }
+}

+ 22 - 0
src/Ryujinx.Graphics.Metal/CounterEvent.cs

@@ -0,0 +1,22 @@
+using Ryujinx.Graphics.GAL;
+
+namespace Ryujinx.Graphics.Metal
+{
+    class CounterEvent : ICounterEvent
+    {
+        public CounterEvent()
+        {
+            Invalid = false;
+        }
+
+        public bool Invalid { get; set; }
+        public bool ReserveForHostAccess()
+        {
+            return true;
+        }
+
+        public void Flush() { }
+
+        public void Dispose() { }
+    }
+}

+ 68 - 0
src/Ryujinx.Graphics.Metal/DepthStencilCache.cs

@@ -0,0 +1,68 @@
+using Ryujinx.Graphics.Metal.State;
+using SharpMetal.Metal;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class DepthStencilCache : StateCache<MTLDepthStencilState, DepthStencilUid, DepthStencilUid>
+    {
+        private readonly MTLDevice _device;
+
+        public DepthStencilCache(MTLDevice device)
+        {
+            _device = device;
+        }
+
+        protected override DepthStencilUid GetHash(DepthStencilUid descriptor)
+        {
+            return descriptor;
+        }
+
+        protected override MTLDepthStencilState CreateValue(DepthStencilUid descriptor)
+        {
+            // Create descriptors
+
+            ref StencilUid frontUid = ref descriptor.FrontFace;
+
+            using var frontFaceStencil = new MTLStencilDescriptor
+            {
+                StencilFailureOperation = frontUid.StencilFailureOperation,
+                DepthFailureOperation = frontUid.DepthFailureOperation,
+                DepthStencilPassOperation = frontUid.DepthStencilPassOperation,
+                StencilCompareFunction = frontUid.StencilCompareFunction,
+                ReadMask = frontUid.ReadMask,
+                WriteMask = frontUid.WriteMask
+            };
+
+            ref StencilUid backUid = ref descriptor.BackFace;
+
+            using var backFaceStencil = new MTLStencilDescriptor
+            {
+                StencilFailureOperation = backUid.StencilFailureOperation,
+                DepthFailureOperation = backUid.DepthFailureOperation,
+                DepthStencilPassOperation = backUid.DepthStencilPassOperation,
+                StencilCompareFunction = backUid.StencilCompareFunction,
+                ReadMask = backUid.ReadMask,
+                WriteMask = backUid.WriteMask
+            };
+
+            var mtlDescriptor = new MTLDepthStencilDescriptor
+            {
+                DepthCompareFunction = descriptor.DepthCompareFunction,
+                DepthWriteEnabled = descriptor.DepthWriteEnabled
+            };
+
+            if (descriptor.StencilTestEnabled)
+            {
+                mtlDescriptor.BackFaceStencil = backFaceStencil;
+                mtlDescriptor.FrontFaceStencil = frontFaceStencil;
+            }
+
+            using (mtlDescriptor)
+            {
+                return _device.NewDepthStencilState(mtlDescriptor);
+            }
+        }
+    }
+}

+ 26 - 0
src/Ryujinx.Graphics.Metal/DisposableBuffer.cs

@@ -0,0 +1,26 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly struct DisposableBuffer : IDisposable
+    {
+        public MTLBuffer Value { get; }
+
+        public DisposableBuffer(MTLBuffer buffer)
+        {
+            Value = buffer;
+        }
+
+        public void Dispose()
+        {
+            if (Value != IntPtr.Zero)
+            {
+                Value.SetPurgeableState(MTLPurgeableState.Empty);
+                Value.Dispose();
+            }
+        }
+    }
+}

+ 22 - 0
src/Ryujinx.Graphics.Metal/DisposableSampler.cs

@@ -0,0 +1,22 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly struct DisposableSampler : IDisposable
+    {
+        public MTLSamplerState Value { get; }
+
+        public DisposableSampler(MTLSamplerState sampler)
+        {
+            Value = sampler;
+        }
+
+        public void Dispose()
+        {
+            Value.Dispose();
+        }
+    }
+}

+ 10 - 0
src/Ryujinx.Graphics.Metal/Effects/IPostProcessingEffect.cs

@@ -0,0 +1,10 @@
+using System;
+
+namespace Ryujinx.Graphics.Metal.Effects
+{
+    internal interface IPostProcessingEffect : IDisposable
+    {
+        const int LocalGroupSize = 64;
+        Texture Run(Texture view, int width, int height);
+    }
+}

+ 18 - 0
src/Ryujinx.Graphics.Metal/Effects/IScalingFilter.cs

@@ -0,0 +1,18 @@
+using Ryujinx.Graphics.GAL;
+using System;
+
+namespace Ryujinx.Graphics.Metal.Effects
+{
+    internal interface IScalingFilter : IDisposable
+    {
+        float Level { get; set; }
+        void Run(
+            Texture view,
+            Texture destinationTexture,
+            Format format,
+            int width,
+            int height,
+            Extents2D source,
+            Extents2D destination);
+    }
+}

+ 63 - 0
src/Ryujinx.Graphics.Metal/EncoderResources.cs

@@ -0,0 +1,63 @@
+using SharpMetal.Metal;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Metal
+{
+    public struct RenderEncoderBindings
+    {
+        public List<Resource> Resources = new();
+        public List<BufferResource> VertexBuffers = new();
+        public List<BufferResource> FragmentBuffers = new();
+
+        public RenderEncoderBindings() { }
+
+        public readonly void Clear()
+        {
+            Resources.Clear();
+            VertexBuffers.Clear();
+            FragmentBuffers.Clear();
+        }
+    }
+
+    public struct ComputeEncoderBindings
+    {
+        public List<Resource> Resources = new();
+        public List<BufferResource> Buffers = new();
+
+        public ComputeEncoderBindings() { }
+
+        public readonly void Clear()
+        {
+            Resources.Clear();
+            Buffers.Clear();
+        }
+    }
+
+    public struct BufferResource
+    {
+        public MTLBuffer Buffer;
+        public ulong Offset;
+        public ulong Binding;
+
+        public BufferResource(MTLBuffer buffer, ulong offset, ulong binding)
+        {
+            Buffer = buffer;
+            Offset = offset;
+            Binding = binding;
+        }
+    }
+
+    public struct Resource
+    {
+        public MTLResource MtlResource;
+        public MTLResourceUsage ResourceUsage;
+        public MTLRenderStages Stages;
+
+        public Resource(MTLResource resource, MTLResourceUsage resourceUsage, MTLRenderStages stages)
+        {
+            MtlResource = resource;
+            ResourceUsage = resourceUsage;
+            Stages = stages;
+        }
+    }
+}

+ 206 - 0
src/Ryujinx.Graphics.Metal/EncoderState.cs

@@ -0,0 +1,206 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Metal.State;
+using Ryujinx.Graphics.Shader;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [Flags]
+    enum DirtyFlags
+    {
+        None = 0,
+        RenderPipeline = 1 << 0,
+        ComputePipeline = 1 << 1,
+        DepthStencil = 1 << 2,
+        DepthClamp = 1 << 3,
+        DepthBias = 1 << 4,
+        CullMode = 1 << 5,
+        FrontFace = 1 << 6,
+        StencilRef = 1 << 7,
+        Viewports = 1 << 8,
+        Scissors = 1 << 9,
+        Uniforms = 1 << 10,
+        Storages = 1 << 11,
+        Textures = 1 << 12,
+        Images = 1 << 13,
+
+        ArgBuffers = Uniforms | Storages | Textures | Images,
+
+        RenderAll = RenderPipeline | DepthStencil | DepthClamp | DepthBias | CullMode | FrontFace | StencilRef | Viewports | Scissors | ArgBuffers,
+        ComputeAll = ComputePipeline | ArgBuffers,
+        All = RenderAll | ComputeAll,
+    }
+
+    record struct BufferRef
+    {
+        public Auto<DisposableBuffer> Buffer;
+        public BufferRange? Range;
+
+        public BufferRef(Auto<DisposableBuffer> buffer)
+        {
+            Buffer = buffer;
+        }
+
+        public BufferRef(Auto<DisposableBuffer> buffer, ref BufferRange range)
+        {
+            Buffer = buffer;
+            Range = range;
+        }
+    }
+
+    record struct TextureRef
+    {
+        public ShaderStage Stage;
+        public TextureBase Storage;
+        public Auto<DisposableSampler> Sampler;
+        public Format ImageFormat;
+
+        public TextureRef(ShaderStage stage, TextureBase storage, Auto<DisposableSampler> sampler)
+        {
+            Stage = stage;
+            Storage = storage;
+            Sampler = sampler;
+        }
+    }
+
+    record struct ImageRef
+    {
+        public ShaderStage Stage;
+        public Texture Storage;
+
+        public ImageRef(ShaderStage stage, Texture storage)
+        {
+            Stage = stage;
+            Storage = storage;
+        }
+    }
+
+    struct PredrawState
+    {
+        public MTLCullMode CullMode;
+        public DepthStencilUid DepthStencilUid;
+        public PrimitiveTopology Topology;
+        public MTLViewport[] Viewports;
+    }
+
+    struct RenderTargetCopy
+    {
+        public MTLScissorRect[] Scissors;
+        public Texture DepthStencil;
+        public Texture[] RenderTargets;
+    }
+
+    [SupportedOSPlatform("macos")]
+    class EncoderState
+    {
+        public Program RenderProgram = null;
+        public Program ComputeProgram = null;
+
+        public PipelineState Pipeline;
+        public DepthStencilUid DepthStencilUid;
+
+        public readonly record struct ArrayRef<T>(ShaderStage Stage, T Array);
+
+        public readonly BufferRef[] UniformBufferRefs = new BufferRef[Constants.MaxUniformBufferBindings];
+        public readonly BufferRef[] StorageBufferRefs = new BufferRef[Constants.MaxStorageBufferBindings];
+        public readonly TextureRef[] TextureRefs = new TextureRef[Constants.MaxTextureBindings * 2];
+        public readonly ImageRef[] ImageRefs = new ImageRef[Constants.MaxImageBindings * 2];
+
+        public ArrayRef<TextureArray>[] TextureArrayRefs = [];
+        public ArrayRef<ImageArray>[] ImageArrayRefs = [];
+
+        public ArrayRef<TextureArray>[] TextureArrayExtraRefs = [];
+        public ArrayRef<ImageArray>[] ImageArrayExtraRefs = [];
+
+        public IndexBufferState IndexBuffer = default;
+
+        public MTLDepthClipMode DepthClipMode = MTLDepthClipMode.Clip;
+
+        public float DepthBias;
+        public float SlopeScale;
+        public float Clamp;
+
+        public int BackRefValue = 0;
+        public int FrontRefValue = 0;
+
+        public PrimitiveTopology Topology = PrimitiveTopology.Triangles;
+        public MTLCullMode CullMode = MTLCullMode.None;
+        public MTLWinding Winding = MTLWinding.CounterClockwise;
+        public bool CullBoth = false;
+
+        public MTLViewport[] Viewports = new MTLViewport[Constants.MaxViewports];
+        public MTLScissorRect[] Scissors = new MTLScissorRect[Constants.MaxViewports];
+
+        // Changes to attachments take recreation!
+        public Texture DepthStencil;
+        public Texture[] RenderTargets = new Texture[Constants.MaxColorAttachments];
+        public ITexture PreMaskDepthStencil = default;
+        public ITexture[] PreMaskRenderTargets;
+        public bool FramebufferUsingColorWriteMask;
+
+        public Array8<ColorBlendStateUid> StoredBlend;
+        public ColorF BlendColor = new();
+
+        public readonly VertexBufferState[] VertexBuffers = new VertexBufferState[Constants.MaxVertexBuffers];
+        public readonly VertexAttribDescriptor[] VertexAttribs = new VertexAttribDescriptor[Constants.MaxVertexAttributes];
+        // Dirty flags
+        public DirtyFlags Dirty = DirtyFlags.None;
+
+        // Only to be used for present
+        public bool ClearLoadAction = false;
+
+        public RenderEncoderBindings RenderEncoderBindings = new();
+        public ComputeEncoderBindings ComputeEncoderBindings = new();
+
+        public EncoderState()
+        {
+            Pipeline.Initialize();
+            DepthStencilUid.DepthCompareFunction = MTLCompareFunction.Always;
+        }
+
+        public RenderTargetCopy InheritForClear(EncoderState other, bool depth, int singleIndex = -1)
+        {
+            // Inherit render target related information without causing a render encoder split.
+
+            var oldState = new RenderTargetCopy
+            {
+                Scissors = other.Scissors,
+                RenderTargets = other.RenderTargets,
+                DepthStencil = other.DepthStencil
+            };
+
+            Scissors = other.Scissors;
+            RenderTargets = other.RenderTargets;
+            DepthStencil = other.DepthStencil;
+
+            Pipeline.ColorBlendAttachmentStateCount = other.Pipeline.ColorBlendAttachmentStateCount;
+            Pipeline.Internal.ColorBlendState = other.Pipeline.Internal.ColorBlendState;
+            Pipeline.DepthStencilFormat = other.Pipeline.DepthStencilFormat;
+
+            ref var blendStates = ref Pipeline.Internal.ColorBlendState;
+
+            // Mask out irrelevant attachments.
+            for (int i = 0; i < blendStates.Length; i++)
+            {
+                if (depth || (singleIndex != -1 && singleIndex != i))
+                {
+                    blendStates[i].WriteMask = MTLColorWriteMask.None;
+                }
+            }
+
+            return oldState;
+        }
+
+        public void Restore(RenderTargetCopy copy)
+        {
+            Scissors = copy.Scissors;
+            RenderTargets = copy.RenderTargets;
+            DepthStencil = copy.DepthStencil;
+
+            Pipeline.Internal.ResetColorState();
+        }
+    }
+}

+ 1788 - 0
src/Ryujinx.Graphics.Metal/EncoderStateManager.cs

@@ -0,0 +1,1788 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Metal.State;
+using Ryujinx.Graphics.Shader;
+using SharpMetal.Metal;
+using System;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+using BufferAssignment = Ryujinx.Graphics.GAL.BufferAssignment;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    struct EncoderStateManager : IDisposable
+    {
+        private const int ArrayGrowthSize = 16;
+
+        private readonly MTLDevice _device;
+        private readonly Pipeline _pipeline;
+        private readonly BufferManager _bufferManager;
+
+        private readonly DepthStencilCache _depthStencilCache;
+        private readonly MTLDepthStencilState _defaultState;
+
+        private readonly EncoderState _mainState = new();
+        private EncoderState _currentState;
+
+        public readonly IndexBufferState IndexBuffer => _currentState.IndexBuffer;
+        public readonly PrimitiveTopology Topology => _currentState.Topology;
+        public readonly Texture[] RenderTargets => _currentState.RenderTargets;
+        public readonly Texture DepthStencil => _currentState.DepthStencil;
+        public readonly ComputeSize ComputeLocalSize => _currentState.ComputeProgram.ComputeLocalSize;
+
+        // RGBA32F is the biggest format
+        private const int ZeroBufferSize = 4 * 4;
+        private readonly BufferHandle _zeroBuffer;
+
+        public unsafe EncoderStateManager(MTLDevice device, BufferManager bufferManager, Pipeline pipeline)
+        {
+            _device = device;
+            _pipeline = pipeline;
+            _bufferManager = bufferManager;
+
+            _depthStencilCache = new(device);
+            _currentState = _mainState;
+
+            _defaultState = _depthStencilCache.GetOrCreate(_currentState.DepthStencilUid);
+
+            // Zero buffer
+            byte[] zeros = new byte[ZeroBufferSize];
+            fixed (byte* ptr = zeros)
+            {
+                _zeroBuffer = _bufferManager.Create((IntPtr)ptr, ZeroBufferSize);
+            }
+        }
+
+        public readonly void Dispose()
+        {
+            _depthStencilCache.Dispose();
+        }
+
+        private readonly void SignalDirty(DirtyFlags flags)
+        {
+            _currentState.Dirty |= flags;
+        }
+
+        public readonly void SignalRenderDirty()
+        {
+            SignalDirty(DirtyFlags.RenderAll);
+        }
+
+        public readonly void SignalComputeDirty()
+        {
+            SignalDirty(DirtyFlags.ComputeAll);
+        }
+
+        public EncoderState SwapState(EncoderState state, DirtyFlags flags = DirtyFlags.All)
+        {
+            _currentState = state ?? _mainState;
+
+            SignalDirty(flags);
+
+            return _mainState;
+        }
+
+        public PredrawState SavePredrawState()
+        {
+            return new PredrawState
+            {
+                CullMode = _currentState.CullMode,
+                DepthStencilUid = _currentState.DepthStencilUid,
+                Topology = _currentState.Topology,
+                Viewports = _currentState.Viewports.ToArray(),
+            };
+        }
+
+        public readonly void RestorePredrawState(PredrawState state)
+        {
+            _currentState.CullMode = state.CullMode;
+            _currentState.DepthStencilUid = state.DepthStencilUid;
+            _currentState.Topology = state.Topology;
+            _currentState.Viewports = state.Viewports;
+
+            SignalDirty(DirtyFlags.CullMode | DirtyFlags.DepthStencil | DirtyFlags.Viewports);
+        }
+
+        public readonly void SetClearLoadAction(bool clear)
+        {
+            _currentState.ClearLoadAction = clear;
+        }
+
+        public readonly void DirtyTextures()
+        {
+            SignalDirty(DirtyFlags.Textures);
+        }
+
+        public readonly void DirtyImages()
+        {
+            SignalDirty(DirtyFlags.Images);
+        }
+
+        public readonly MTLRenderCommandEncoder CreateRenderCommandEncoder()
+        {
+            // Initialise Pass & State
+            using var renderPassDescriptor = new MTLRenderPassDescriptor();
+
+            for (int i = 0; i < Constants.MaxColorAttachments; i++)
+            {
+                if (_currentState.RenderTargets[i] is Texture tex)
+                {
+                    var passAttachment = renderPassDescriptor.ColorAttachments.Object((ulong)i);
+                    tex.PopulateRenderPassAttachment(passAttachment);
+                    passAttachment.LoadAction = _currentState.ClearLoadAction ? MTLLoadAction.Clear : MTLLoadAction.Load;
+                    passAttachment.StoreAction = MTLStoreAction.Store;
+                }
+            }
+
+            var depthAttachment = renderPassDescriptor.DepthAttachment;
+            var stencilAttachment = renderPassDescriptor.StencilAttachment;
+
+            if (_currentState.DepthStencil != null)
+            {
+                switch (_currentState.DepthStencil.GetHandle().PixelFormat)
+                {
+                    // Depth Only Attachment
+                    case MTLPixelFormat.Depth16Unorm:
+                    case MTLPixelFormat.Depth32Float:
+                        depthAttachment.Texture = _currentState.DepthStencil.GetHandle();
+                        depthAttachment.LoadAction = MTLLoadAction.Load;
+                        depthAttachment.StoreAction = MTLStoreAction.Store;
+                        break;
+
+                    // Stencil Only Attachment
+                    case MTLPixelFormat.Stencil8:
+                        stencilAttachment.Texture = _currentState.DepthStencil.GetHandle();
+                        stencilAttachment.LoadAction = MTLLoadAction.Load;
+                        stencilAttachment.StoreAction = MTLStoreAction.Store;
+                        break;
+
+                    // Combined Attachment
+                    case MTLPixelFormat.Depth24UnormStencil8:
+                    case MTLPixelFormat.Depth32FloatStencil8:
+                        depthAttachment.Texture = _currentState.DepthStencil.GetHandle();
+                        depthAttachment.LoadAction = MTLLoadAction.Load;
+                        depthAttachment.StoreAction = MTLStoreAction.Store;
+
+                        stencilAttachment.Texture = _currentState.DepthStencil.GetHandle();
+                        stencilAttachment.LoadAction = MTLLoadAction.Load;
+                        stencilAttachment.StoreAction = MTLStoreAction.Store;
+                        break;
+                    default:
+                        Logger.Error?.PrintMsg(LogClass.Gpu, $"Unsupported Depth/Stencil Format: {_currentState.DepthStencil.GetHandle().PixelFormat}!");
+                        break;
+                }
+            }
+
+            // Initialise Encoder
+            var renderCommandEncoder = _pipeline.CommandBuffer.RenderCommandEncoder(renderPassDescriptor);
+
+            return renderCommandEncoder;
+        }
+
+        public readonly MTLComputeCommandEncoder CreateComputeCommandEncoder()
+        {
+            using var descriptor = new MTLComputePassDescriptor();
+            var computeCommandEncoder = _pipeline.CommandBuffer.ComputeCommandEncoder(descriptor);
+
+            return computeCommandEncoder;
+        }
+
+        public readonly void RenderResourcesPrepass()
+        {
+            _currentState.RenderEncoderBindings.Clear();
+
+            if ((_currentState.Dirty & DirtyFlags.RenderPipeline) != 0)
+            {
+                SetVertexBuffers(_currentState.VertexBuffers, ref _currentState.RenderEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Uniforms) != 0)
+            {
+                UpdateAndBind(_currentState.RenderProgram, Constants.ConstantBuffersSetIndex, ref _currentState.RenderEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Storages) != 0)
+            {
+                UpdateAndBind(_currentState.RenderProgram, Constants.StorageBuffersSetIndex, ref _currentState.RenderEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Textures) != 0)
+            {
+                UpdateAndBind(_currentState.RenderProgram, Constants.TexturesSetIndex, ref _currentState.RenderEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Images) != 0)
+            {
+                UpdateAndBind(_currentState.RenderProgram, Constants.ImagesSetIndex, ref _currentState.RenderEncoderBindings);
+            }
+        }
+
+        public readonly void ComputeResourcesPrepass()
+        {
+            _currentState.ComputeEncoderBindings.Clear();
+
+            if ((_currentState.Dirty & DirtyFlags.Uniforms) != 0)
+            {
+                UpdateAndBind(_currentState.ComputeProgram, Constants.ConstantBuffersSetIndex, ref _currentState.ComputeEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Storages) != 0)
+            {
+                UpdateAndBind(_currentState.ComputeProgram, Constants.StorageBuffersSetIndex, ref _currentState.ComputeEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Textures) != 0)
+            {
+                UpdateAndBind(_currentState.ComputeProgram, Constants.TexturesSetIndex, ref _currentState.ComputeEncoderBindings);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Images) != 0)
+            {
+                UpdateAndBind(_currentState.ComputeProgram, Constants.ImagesSetIndex, ref _currentState.ComputeEncoderBindings);
+            }
+        }
+
+        public void RebindRenderState(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            if ((_currentState.Dirty & DirtyFlags.RenderPipeline) != 0)
+            {
+                SetRenderPipelineState(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.DepthStencil) != 0)
+            {
+                SetDepthStencilState(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.DepthClamp) != 0)
+            {
+                SetDepthClamp(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.DepthBias) != 0)
+            {
+                SetDepthBias(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.CullMode) != 0)
+            {
+                SetCullMode(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.FrontFace) != 0)
+            {
+                SetFrontFace(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.StencilRef) != 0)
+            {
+                SetStencilRefValue(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Viewports) != 0)
+            {
+                SetViewports(renderCommandEncoder);
+            }
+
+            if ((_currentState.Dirty & DirtyFlags.Scissors) != 0)
+            {
+                SetScissors(renderCommandEncoder);
+            }
+
+            foreach (var resource in _currentState.RenderEncoderBindings.Resources)
+            {
+                renderCommandEncoder.UseResource(resource.MtlResource, resource.ResourceUsage, resource.Stages);
+            }
+
+            foreach (var buffer in _currentState.RenderEncoderBindings.VertexBuffers)
+            {
+                renderCommandEncoder.SetVertexBuffer(buffer.Buffer, buffer.Offset, buffer.Binding);
+            }
+
+            foreach (var buffer in _currentState.RenderEncoderBindings.FragmentBuffers)
+            {
+                renderCommandEncoder.SetFragmentBuffer(buffer.Buffer, buffer.Offset, buffer.Binding);
+            }
+
+            _currentState.Dirty &= ~DirtyFlags.RenderAll;
+        }
+
+        public readonly void RebindComputeState(MTLComputeCommandEncoder computeCommandEncoder)
+        {
+            if ((_currentState.Dirty & DirtyFlags.ComputePipeline) != 0)
+            {
+                SetComputePipelineState(computeCommandEncoder);
+            }
+
+            foreach (var resource in _currentState.ComputeEncoderBindings.Resources)
+            {
+                computeCommandEncoder.UseResource(resource.MtlResource, resource.ResourceUsage);
+            }
+
+            foreach (var buffer in _currentState.ComputeEncoderBindings.Buffers)
+            {
+                computeCommandEncoder.SetBuffer(buffer.Buffer, buffer.Offset, buffer.Binding);
+            }
+
+            _currentState.Dirty &= ~DirtyFlags.ComputeAll;
+        }
+
+        private readonly void SetRenderPipelineState(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            MTLRenderPipelineState pipelineState = _currentState.Pipeline.CreateRenderPipeline(_device, _currentState.RenderProgram);
+
+            renderCommandEncoder.SetRenderPipelineState(pipelineState);
+
+            renderCommandEncoder.SetBlendColor(
+                _currentState.BlendColor.Red,
+                _currentState.BlendColor.Green,
+                _currentState.BlendColor.Blue,
+                _currentState.BlendColor.Alpha);
+        }
+
+        private readonly void SetComputePipelineState(MTLComputeCommandEncoder computeCommandEncoder)
+        {
+            if (_currentState.ComputeProgram == null)
+            {
+                return;
+            }
+
+            var pipelineState = PipelineState.CreateComputePipeline(_device, _currentState.ComputeProgram);
+
+            computeCommandEncoder.SetComputePipelineState(pipelineState);
+        }
+
+        public readonly void UpdateIndexBuffer(BufferRange buffer, IndexType type)
+        {
+            if (buffer.Handle != BufferHandle.Null)
+            {
+                _currentState.IndexBuffer = new IndexBufferState(buffer.Handle, buffer.Offset, buffer.Size, type);
+            }
+            else
+            {
+                _currentState.IndexBuffer = IndexBufferState.Null;
+            }
+        }
+
+        public readonly void UpdatePrimitiveTopology(PrimitiveTopology topology)
+        {
+            _currentState.Topology = topology;
+        }
+
+        public readonly void UpdateProgram(IProgram program)
+        {
+            Program prg = (Program)program;
+
+            if (prg.VertexFunction == IntPtr.Zero && prg.ComputeFunction == IntPtr.Zero)
+            {
+                if (prg.FragmentFunction == IntPtr.Zero)
+                {
+                    Logger.Error?.PrintMsg(LogClass.Gpu, "No compute function");
+                }
+                else
+                {
+                    Logger.Error?.PrintMsg(LogClass.Gpu, "No vertex function");
+                }
+                return;
+            }
+
+            if (prg.VertexFunction != IntPtr.Zero)
+            {
+                _currentState.RenderProgram = prg;
+
+                SignalDirty(DirtyFlags.RenderPipeline | DirtyFlags.ArgBuffers);
+            }
+            else if (prg.ComputeFunction != IntPtr.Zero)
+            {
+                _currentState.ComputeProgram = prg;
+
+                SignalDirty(DirtyFlags.ComputePipeline | DirtyFlags.ArgBuffers);
+            }
+        }
+
+        public readonly void UpdateRasterizerDiscard(bool discard)
+        {
+            _currentState.Pipeline.RasterizerDiscardEnable = discard;
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public readonly void UpdateRenderTargets(ITexture[] colors, ITexture depthStencil)
+        {
+            _currentState.FramebufferUsingColorWriteMask = false;
+            UpdateRenderTargetsInternal(colors, depthStencil);
+        }
+
+        public readonly void UpdateRenderTargetColorMasks(ReadOnlySpan<uint> componentMask)
+        {
+            ref var blendState = ref _currentState.Pipeline.Internal.ColorBlendState;
+
+            for (int i = 0; i < componentMask.Length; i++)
+            {
+                bool red = (componentMask[i] & (0x1 << 0)) != 0;
+                bool green = (componentMask[i] & (0x1 << 1)) != 0;
+                bool blue = (componentMask[i] & (0x1 << 2)) != 0;
+                bool alpha = (componentMask[i] & (0x1 << 3)) != 0;
+
+                var mask = MTLColorWriteMask.None;
+
+                mask |= red ? MTLColorWriteMask.Red : 0;
+                mask |= green ? MTLColorWriteMask.Green : 0;
+                mask |= blue ? MTLColorWriteMask.Blue : 0;
+                mask |= alpha ? MTLColorWriteMask.Alpha : 0;
+
+                ref ColorBlendStateUid mtlBlend = ref blendState[i];
+
+                // When color write mask is 0, remove all blend state to help the pipeline cache.
+                // Restore it when the mask becomes non-zero.
+                if (mtlBlend.WriteMask != mask)
+                {
+                    if (mask == 0)
+                    {
+                        _currentState.StoredBlend[i] = mtlBlend;
+
+                        mtlBlend.Swap(new ColorBlendStateUid());
+                    }
+                    else if (mtlBlend.WriteMask == 0)
+                    {
+                        mtlBlend.Swap(_currentState.StoredBlend[i]);
+                    }
+                }
+
+                blendState[i].WriteMask = mask;
+            }
+
+            if (_currentState.FramebufferUsingColorWriteMask)
+            {
+                UpdateRenderTargetsInternal(_currentState.PreMaskRenderTargets, _currentState.PreMaskDepthStencil);
+            }
+            else
+            {
+                // Requires recreating pipeline
+                if (_pipeline.CurrentEncoderType == EncoderType.Render)
+                {
+                    _pipeline.EndCurrentPass();
+                }
+            }
+        }
+
+        private readonly void UpdateRenderTargetsInternal(ITexture[] colors, ITexture depthStencil)
+        {
+            // TBDR GPUs don't work properly if the same attachment is bound to multiple targets,
+            // due to each attachment being a copy of the real attachment, rather than a direct write.
+            //
+            // Just try to remove duplicate attachments.
+            // Save a copy of the array to rebind when mask changes.
+
+            // Look for textures that are masked out.
+
+            ref PipelineState pipeline = ref _currentState.Pipeline;
+            ref var blendState = ref pipeline.Internal.ColorBlendState;
+
+            pipeline.ColorBlendAttachmentStateCount = (uint)colors.Length;
+
+            for (int i = 0; i < colors.Length; i++)
+            {
+                if (colors[i] == null)
+                {
+                    continue;
+                }
+
+                var mtlMask = blendState[i].WriteMask;
+
+                for (int j = 0; j < i; j++)
+                {
+                    // Check each binding for a duplicate binding before it.
+
+                    if (colors[i] == colors[j])
+                    {
+                        // Prefer the binding with no write mask.
+
+                        var mtlMask2 = blendState[j].WriteMask;
+
+                        if (mtlMask == 0)
+                        {
+                            colors[i] = null;
+                            MaskOut(colors, depthStencil);
+                        }
+                        else if (mtlMask2 == 0)
+                        {
+                            colors[j] = null;
+                            MaskOut(colors, depthStencil);
+                        }
+                    }
+                }
+            }
+
+            _currentState.RenderTargets = new Texture[Constants.MaxColorAttachments];
+
+            for (int i = 0; i < colors.Length; i++)
+            {
+                if (colors[i] is not Texture tex)
+                {
+                    blendState[i].PixelFormat = MTLPixelFormat.Invalid;
+
+                    continue;
+                }
+
+                blendState[i].PixelFormat = tex.GetHandle().PixelFormat; // TODO: cache this
+                _currentState.RenderTargets[i] = tex;
+            }
+
+            if (depthStencil is Texture depthTexture)
+            {
+                pipeline.DepthStencilFormat = depthTexture.GetHandle().PixelFormat; // TODO: cache this
+                _currentState.DepthStencil = depthTexture;
+            }
+            else if (depthStencil == null)
+            {
+                pipeline.DepthStencilFormat = MTLPixelFormat.Invalid;
+                _currentState.DepthStencil = null;
+            }
+
+            // Requires recreating pipeline
+            if (_pipeline.CurrentEncoderType == EncoderType.Render)
+            {
+                _pipeline.EndCurrentPass();
+            }
+        }
+
+        private readonly void MaskOut(ITexture[] colors, ITexture depthStencil)
+        {
+            if (!_currentState.FramebufferUsingColorWriteMask)
+            {
+                _currentState.PreMaskRenderTargets = colors;
+                _currentState.PreMaskDepthStencil = depthStencil;
+            }
+
+            // If true, then the framebuffer must be recreated when the mask changes.
+            _currentState.FramebufferUsingColorWriteMask = true;
+        }
+
+        public readonly void UpdateVertexAttribs(ReadOnlySpan<VertexAttribDescriptor> vertexAttribs)
+        {
+            vertexAttribs.CopyTo(_currentState.VertexAttribs);
+
+            // Update the buffers on the pipeline
+            UpdatePipelineVertexState(_currentState.VertexBuffers, _currentState.VertexAttribs);
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public readonly void UpdateBlendDescriptors(int index, BlendDescriptor blend)
+        {
+            ref var blendState = ref _currentState.Pipeline.Internal.ColorBlendState[index];
+
+            blendState.Enable = blend.Enable;
+            blendState.AlphaBlendOperation = blend.AlphaOp.Convert();
+            blendState.RgbBlendOperation = blend.ColorOp.Convert();
+            blendState.SourceAlphaBlendFactor = blend.AlphaSrcFactor.Convert();
+            blendState.DestinationAlphaBlendFactor = blend.AlphaDstFactor.Convert();
+            blendState.SourceRGBBlendFactor = blend.ColorSrcFactor.Convert();
+            blendState.DestinationRGBBlendFactor = blend.ColorDstFactor.Convert();
+
+            if (blendState.WriteMask == 0)
+            {
+                _currentState.StoredBlend[index] = blendState;
+
+                blendState.Swap(new ColorBlendStateUid());
+            }
+
+            _currentState.BlendColor = blend.BlendConstant;
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public void UpdateStencilState(StencilTestDescriptor stencilTest)
+        {
+            ref DepthStencilUid uid = ref _currentState.DepthStencilUid;
+
+            uid.FrontFace = new StencilUid
+            {
+                StencilFailureOperation = stencilTest.FrontSFail.Convert(),
+                DepthFailureOperation = stencilTest.FrontDpFail.Convert(),
+                DepthStencilPassOperation = stencilTest.FrontDpPass.Convert(),
+                StencilCompareFunction = stencilTest.FrontFunc.Convert(),
+                ReadMask = (uint)stencilTest.FrontFuncMask,
+                WriteMask = (uint)stencilTest.FrontMask
+            };
+
+            uid.BackFace = new StencilUid
+            {
+                StencilFailureOperation = stencilTest.BackSFail.Convert(),
+                DepthFailureOperation = stencilTest.BackDpFail.Convert(),
+                DepthStencilPassOperation = stencilTest.BackDpPass.Convert(),
+                StencilCompareFunction = stencilTest.BackFunc.Convert(),
+                ReadMask = (uint)stencilTest.BackFuncMask,
+                WriteMask = (uint)stencilTest.BackMask
+            };
+
+            uid.StencilTestEnabled = stencilTest.TestEnable;
+
+            UpdateStencilRefValue(stencilTest.FrontFuncRef, stencilTest.BackFuncRef);
+
+            SignalDirty(DirtyFlags.DepthStencil);
+        }
+
+        public readonly void UpdateDepthState(DepthTestDescriptor depthTest)
+        {
+            ref DepthStencilUid uid = ref _currentState.DepthStencilUid;
+
+            uid.DepthCompareFunction = depthTest.TestEnable ? depthTest.Func.Convert() : MTLCompareFunction.Always;
+            uid.DepthWriteEnabled = depthTest.TestEnable && depthTest.WriteEnable;
+
+            SignalDirty(DirtyFlags.DepthStencil);
+        }
+
+        public readonly void UpdateDepthClamp(bool clamp)
+        {
+            _currentState.DepthClipMode = clamp ? MTLDepthClipMode.Clamp : MTLDepthClipMode.Clip;
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetDepthClamp(renderCommandEncoder);
+                return;
+            }
+
+            SignalDirty(DirtyFlags.DepthClamp);
+        }
+
+        public readonly void UpdateDepthBias(float depthBias, float slopeScale, float clamp)
+        {
+            _currentState.DepthBias = depthBias;
+            _currentState.SlopeScale = slopeScale;
+            _currentState.Clamp = clamp;
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetDepthBias(renderCommandEncoder);
+                return;
+            }
+
+            SignalDirty(DirtyFlags.DepthBias);
+        }
+
+        public readonly void UpdateLogicOpState(bool enable, LogicalOp op)
+        {
+            _currentState.Pipeline.LogicOpEnable = enable;
+            _currentState.Pipeline.LogicOp = op.Convert();
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public readonly void UpdateMultisampleState(MultisampleDescriptor multisample)
+        {
+            _currentState.Pipeline.AlphaToCoverageEnable = multisample.AlphaToCoverageEnable;
+            _currentState.Pipeline.AlphaToOneEnable = multisample.AlphaToOneEnable;
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public void UpdateScissors(ReadOnlySpan<Rectangle<int>> regions)
+        {
+            for (int i = 0; i < regions.Length; i++)
+            {
+                var region = regions[i];
+
+                _currentState.Scissors[i] = new MTLScissorRect
+                {
+                    height = (ulong)region.Height,
+                    width = (ulong)region.Width,
+                    x = (ulong)region.X,
+                    y = (ulong)region.Y
+                };
+            }
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetScissors(renderCommandEncoder);
+                return;
+            }
+
+            SignalDirty(DirtyFlags.Scissors);
+        }
+
+        public void UpdateViewports(ReadOnlySpan<Viewport> viewports)
+        {
+            static float Clamp(float value)
+            {
+                return Math.Clamp(value, 0f, 1f);
+            }
+
+            for (int i = 0; i < viewports.Length; i++)
+            {
+                var viewport = viewports[i];
+                // Y coordinate is inverted
+                _currentState.Viewports[i] = new MTLViewport
+                {
+                    originX = viewport.Region.X,
+                    originY = viewport.Region.Y + viewport.Region.Height,
+                    width = viewport.Region.Width,
+                    height = -viewport.Region.Height,
+                    znear = Clamp(viewport.DepthNear),
+                    zfar = Clamp(viewport.DepthFar)
+                };
+            }
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetViewports(renderCommandEncoder);
+                return;
+            }
+
+            SignalDirty(DirtyFlags.Viewports);
+        }
+
+        public readonly void UpdateVertexBuffers(ReadOnlySpan<VertexBufferDescriptor> vertexBuffers)
+        {
+            for (int i = 0; i < Constants.MaxVertexBuffers; i++)
+            {
+                if (i < vertexBuffers.Length)
+                {
+                    var vertexBuffer = vertexBuffers[i];
+
+                    _currentState.VertexBuffers[i] = new VertexBufferState(
+                        vertexBuffer.Buffer.Handle,
+                        vertexBuffer.Buffer.Offset,
+                        vertexBuffer.Buffer.Size,
+                        vertexBuffer.Divisor,
+                        vertexBuffer.Stride);
+                }
+                else
+                {
+                    _currentState.VertexBuffers[i] = VertexBufferState.Null;
+                }
+            }
+
+            // Update the buffers on the pipeline
+            UpdatePipelineVertexState(_currentState.VertexBuffers, _currentState.VertexAttribs);
+
+            SignalDirty(DirtyFlags.RenderPipeline);
+        }
+
+        public readonly void UpdateUniformBuffers(ReadOnlySpan<BufferAssignment> buffers)
+        {
+            foreach (BufferAssignment assignment in buffers)
+            {
+                var buffer = assignment.Range;
+                int index = assignment.Binding;
+
+                Auto<DisposableBuffer> mtlBuffer = buffer.Handle == BufferHandle.Null
+                    ? null
+                    : _bufferManager.GetBuffer(buffer.Handle, buffer.Write);
+
+                _currentState.UniformBufferRefs[index] = new BufferRef(mtlBuffer, ref buffer);
+            }
+
+            SignalDirty(DirtyFlags.Uniforms);
+        }
+
+        public readonly void UpdateStorageBuffers(ReadOnlySpan<BufferAssignment> buffers)
+        {
+            foreach (BufferAssignment assignment in buffers)
+            {
+                var buffer = assignment.Range;
+                int index = assignment.Binding;
+
+                Auto<DisposableBuffer> mtlBuffer = buffer.Handle == BufferHandle.Null
+                    ? null
+                    : _bufferManager.GetBuffer(buffer.Handle, buffer.Write);
+
+                _currentState.StorageBufferRefs[index] = new BufferRef(mtlBuffer, ref buffer);
+            }
+
+            SignalDirty(DirtyFlags.Storages);
+        }
+
+        public readonly void UpdateStorageBuffers(int first, ReadOnlySpan<Auto<DisposableBuffer>> buffers)
+        {
+            for (int i = 0; i < buffers.Length; i++)
+            {
+                var mtlBuffer = buffers[i];
+                int index = first + i;
+
+                _currentState.StorageBufferRefs[index] = new BufferRef(mtlBuffer);
+            }
+
+            SignalDirty(DirtyFlags.Storages);
+        }
+
+        public void UpdateCullMode(bool enable, Face face)
+        {
+            var dirtyScissor = (face == Face.FrontAndBack) != _currentState.CullBoth;
+
+            _currentState.CullMode = enable ? face.Convert() : MTLCullMode.None;
+            _currentState.CullBoth = face == Face.FrontAndBack;
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetCullMode(renderCommandEncoder);
+                SetScissors(renderCommandEncoder);
+                return;
+            }
+
+            // Mark dirty
+            SignalDirty(DirtyFlags.CullMode);
+
+            if (dirtyScissor)
+            {
+                SignalDirty(DirtyFlags.Scissors);
+            }
+        }
+
+        public readonly void UpdateFrontFace(FrontFace frontFace)
+        {
+            _currentState.Winding = frontFace.Convert();
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetFrontFace(renderCommandEncoder);
+                return;
+            }
+
+            SignalDirty(DirtyFlags.FrontFace);
+        }
+
+        private readonly void UpdateStencilRefValue(int frontRef, int backRef)
+        {
+            _currentState.FrontRefValue = frontRef;
+            _currentState.BackRefValue = backRef;
+
+            // Inline update
+            if (_pipeline.Encoders.TryGetRenderEncoder(out MTLRenderCommandEncoder renderCommandEncoder))
+            {
+                SetStencilRefValue(renderCommandEncoder);
+            }
+
+            SignalDirty(DirtyFlags.StencilRef);
+        }
+
+        public readonly void UpdateTextureAndSampler(ShaderStage stage, int binding, TextureBase texture, SamplerHolder samplerHolder)
+        {
+            if (texture != null)
+            {
+                _currentState.TextureRefs[binding] = new(stage, texture, samplerHolder?.GetSampler());
+            }
+            else
+            {
+                _currentState.TextureRefs[binding] = default;
+            }
+
+            SignalDirty(DirtyFlags.Textures);
+        }
+
+        public readonly void UpdateImage(ShaderStage stage, int binding, TextureBase image)
+        {
+            if (image is Texture view)
+            {
+                _currentState.ImageRefs[binding] = new(stage, view);
+            }
+            else
+            {
+                _currentState.ImageRefs[binding] = default;
+            }
+
+            SignalDirty(DirtyFlags.Images);
+        }
+
+        public readonly void UpdateTextureArray(ShaderStage stage, int binding, TextureArray array)
+        {
+            ref EncoderState.ArrayRef<TextureArray> arrayRef = ref GetArrayRef(ref _currentState.TextureArrayRefs, binding, ArrayGrowthSize);
+
+            if (arrayRef.Stage != stage || arrayRef.Array != array)
+            {
+                arrayRef = new EncoderState.ArrayRef<TextureArray>(stage, array);
+
+                SignalDirty(DirtyFlags.Textures);
+            }
+        }
+
+        public readonly void UpdateTextureArraySeparate(ShaderStage stage, int setIndex, TextureArray array)
+        {
+            ref EncoderState.ArrayRef<TextureArray> arrayRef = ref GetArrayRef(ref _currentState.TextureArrayExtraRefs, setIndex - MetalRenderer.TotalSets);
+
+            if (arrayRef.Stage != stage || arrayRef.Array != array)
+            {
+                arrayRef = new EncoderState.ArrayRef<TextureArray>(stage, array);
+
+                SignalDirty(DirtyFlags.Textures);
+            }
+        }
+
+        public readonly void UpdateImageArray(ShaderStage stage, int binding, ImageArray array)
+        {
+            ref EncoderState.ArrayRef<ImageArray> arrayRef = ref GetArrayRef(ref _currentState.ImageArrayRefs, binding, ArrayGrowthSize);
+
+            if (arrayRef.Stage != stage || arrayRef.Array != array)
+            {
+                arrayRef = new EncoderState.ArrayRef<ImageArray>(stage, array);
+
+                SignalDirty(DirtyFlags.Images);
+            }
+        }
+
+        public readonly void UpdateImageArraySeparate(ShaderStage stage, int setIndex, ImageArray array)
+        {
+            ref EncoderState.ArrayRef<ImageArray> arrayRef = ref GetArrayRef(ref _currentState.ImageArrayExtraRefs, setIndex - MetalRenderer.TotalSets);
+
+            if (arrayRef.Stage != stage || arrayRef.Array != array)
+            {
+                arrayRef = new EncoderState.ArrayRef<ImageArray>(stage, array);
+
+                SignalDirty(DirtyFlags.Images);
+            }
+        }
+
+        private static ref EncoderState.ArrayRef<T> GetArrayRef<T>(ref EncoderState.ArrayRef<T>[] array, int index, int growthSize = 1)
+        {
+            ArgumentOutOfRangeException.ThrowIfNegative(index);
+
+            if (array.Length <= index)
+            {
+                Array.Resize(ref array, index + growthSize);
+            }
+
+            return ref array[index];
+        }
+
+        private readonly void SetDepthStencilState(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            if (DepthStencil != null)
+            {
+                MTLDepthStencilState state = _depthStencilCache.GetOrCreate(_currentState.DepthStencilUid);
+
+                renderCommandEncoder.SetDepthStencilState(state);
+            }
+            else
+            {
+                renderCommandEncoder.SetDepthStencilState(_defaultState);
+            }
+        }
+
+        private readonly void SetDepthClamp(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            renderCommandEncoder.SetDepthClipMode(_currentState.DepthClipMode);
+        }
+
+        private readonly void SetDepthBias(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            renderCommandEncoder.SetDepthBias(_currentState.DepthBias, _currentState.SlopeScale, _currentState.Clamp);
+        }
+
+        private unsafe void SetScissors(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            var isTriangles = (_currentState.Topology == PrimitiveTopology.Triangles) ||
+                              (_currentState.Topology == PrimitiveTopology.TriangleStrip);
+
+            if (_currentState.CullBoth && isTriangles)
+            {
+                renderCommandEncoder.SetScissorRect(new MTLScissorRect { x = 0, y = 0, width = 0, height = 0 });
+            }
+            else
+            {
+                if (_currentState.Scissors.Length > 0)
+                {
+                    fixed (MTLScissorRect* pMtlScissors = _currentState.Scissors)
+                    {
+                        renderCommandEncoder.SetScissorRects((IntPtr)pMtlScissors, (ulong)_currentState.Scissors.Length);
+                    }
+                }
+            }
+        }
+
+        private readonly unsafe void SetViewports(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            if (_currentState.Viewports.Length > 0)
+            {
+                fixed (MTLViewport* pMtlViewports = _currentState.Viewports)
+                {
+                    renderCommandEncoder.SetViewports((IntPtr)pMtlViewports, (ulong)_currentState.Viewports.Length);
+                }
+            }
+        }
+
+        private readonly void UpdatePipelineVertexState(VertexBufferState[] bufferDescriptors, VertexAttribDescriptor[] attribDescriptors)
+        {
+            ref PipelineState pipeline = ref _currentState.Pipeline;
+            uint indexMask = 0;
+
+            for (int i = 0; i < attribDescriptors.Length; i++)
+            {
+                ref var attrib = ref pipeline.Internal.VertexAttributes[i];
+
+                if (attribDescriptors[i].IsZero)
+                {
+                    attrib.Format = attribDescriptors[i].Format.Convert();
+                    indexMask |= 1u << (int)Constants.ZeroBufferIndex;
+                    attrib.BufferIndex = Constants.ZeroBufferIndex;
+                    attrib.Offset = 0;
+                }
+                else
+                {
+                    attrib.Format = attribDescriptors[i].Format.Convert();
+                    indexMask |= 1u << attribDescriptors[i].BufferIndex;
+                    attrib.BufferIndex = (ulong)attribDescriptors[i].BufferIndex;
+                    attrib.Offset = (ulong)attribDescriptors[i].Offset;
+                }
+            }
+
+            for (int i = 0; i < bufferDescriptors.Length; i++)
+            {
+                ref var layout = ref pipeline.Internal.VertexBindings[i];
+
+                if ((indexMask & (1u << i)) != 0)
+                {
+                    layout.Stride = (uint)bufferDescriptors[i].Stride;
+
+                    if (layout.Stride == 0)
+                    {
+                        layout.Stride = 1;
+                        layout.StepFunction = MTLVertexStepFunction.Constant;
+                        layout.StepRate = 0;
+                    }
+                    else
+                    {
+                        if (bufferDescriptors[i].Divisor > 0)
+                        {
+                            layout.StepFunction = MTLVertexStepFunction.PerInstance;
+                            layout.StepRate = (uint)bufferDescriptors[i].Divisor;
+                        }
+                        else
+                        {
+                            layout.StepFunction = MTLVertexStepFunction.PerVertex;
+                            layout.StepRate = 1;
+                        }
+                    }
+                }
+                else
+                {
+                    layout = new();
+                }
+            }
+
+            ref var zeroBufLayout = ref pipeline.Internal.VertexBindings[(int)Constants.ZeroBufferIndex];
+
+            // Zero buffer
+            if ((indexMask & (1u << (int)Constants.ZeroBufferIndex)) != 0)
+            {
+                zeroBufLayout.Stride = 1;
+                zeroBufLayout.StepFunction = MTLVertexStepFunction.Constant;
+                zeroBufLayout.StepRate = 0;
+            }
+            else
+            {
+                zeroBufLayout = new();
+            }
+
+            pipeline.VertexAttributeDescriptionsCount = (uint)attribDescriptors.Length;
+            pipeline.VertexBindingDescriptionsCount = Constants.ZeroBufferIndex + 1; // TODO: move this out?
+        }
+
+        private readonly void SetVertexBuffers(VertexBufferState[] bufferStates, ref readonly RenderEncoderBindings bindings)
+        {
+            for (int i = 0; i < bufferStates.Length; i++)
+            {
+                (MTLBuffer mtlBuffer, int offset) = bufferStates[i].GetVertexBuffer(_bufferManager, _pipeline.Cbs);
+
+                if (mtlBuffer.NativePtr != IntPtr.Zero)
+                {
+                    bindings.VertexBuffers.Add(new BufferResource(mtlBuffer, (ulong)offset, (ulong)i));
+                }
+            }
+
+            Auto<DisposableBuffer> autoZeroBuffer = _zeroBuffer == BufferHandle.Null
+                ? null
+                : _bufferManager.GetBuffer(_zeroBuffer, false);
+
+            if (autoZeroBuffer == null)
+            {
+                return;
+            }
+
+            var zeroMtlBuffer = autoZeroBuffer.Get(_pipeline.Cbs).Value;
+            bindings.VertexBuffers.Add(new BufferResource(zeroMtlBuffer, 0, Constants.ZeroBufferIndex));
+        }
+
+        private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForBuffer(ref BufferRef buffer)
+        {
+            ulong gpuAddress = 0;
+            IntPtr nativePtr = IntPtr.Zero;
+
+            var range = buffer.Range;
+            var autoBuffer = buffer.Buffer;
+
+            if (autoBuffer != null)
+            {
+                var offset = 0;
+                MTLBuffer mtlBuffer;
+
+                if (range.HasValue)
+                {
+                    offset = range.Value.Offset;
+                    mtlBuffer = autoBuffer.Get(_pipeline.Cbs, offset, range.Value.Size, range.Value.Write).Value;
+                }
+                else
+                {
+                    mtlBuffer = autoBuffer.Get(_pipeline.Cbs).Value;
+                }
+
+                gpuAddress = mtlBuffer.GpuAddress + (ulong)offset;
+                nativePtr = mtlBuffer.NativePtr;
+            }
+
+            return (gpuAddress, nativePtr);
+        }
+
+        private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForTexture(ref TextureRef texture)
+        {
+            var storage = texture.Storage;
+
+            ulong gpuAddress = 0;
+            IntPtr nativePtr = IntPtr.Zero;
+
+            if (storage != null)
+            {
+                if (storage is TextureBuffer textureBuffer)
+                {
+                    textureBuffer.RebuildStorage(false);
+                }
+
+                var mtlTexture = storage.GetHandle();
+
+                gpuAddress = mtlTexture.GpuResourceID._impl;
+                nativePtr = mtlTexture.NativePtr;
+            }
+
+            return (gpuAddress, nativePtr);
+        }
+
+        private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForImage(ref ImageRef image)
+        {
+            var storage = image.Storage;
+
+            ulong gpuAddress = 0;
+            IntPtr nativePtr = IntPtr.Zero;
+
+            if (storage != null)
+            {
+                var mtlTexture = storage.GetHandle();
+
+                gpuAddress = mtlTexture.GpuResourceID._impl;
+                nativePtr = mtlTexture.NativePtr;
+            }
+
+            return (gpuAddress, nativePtr);
+        }
+
+        private readonly (ulong gpuAddress, IntPtr nativePtr) AddressForTextureBuffer(ref TextureBuffer bufferTexture)
+        {
+            ulong gpuAddress = 0;
+            IntPtr nativePtr = IntPtr.Zero;
+
+            if (bufferTexture != null)
+            {
+                bufferTexture.RebuildStorage(false);
+
+                var mtlTexture = bufferTexture.GetHandle();
+
+                gpuAddress = mtlTexture.GpuResourceID._impl;
+                nativePtr = mtlTexture.NativePtr;
+            }
+
+            return (gpuAddress, nativePtr);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void AddResource(IntPtr resourcePointer, MTLResourceUsage usage, MTLRenderStages stages, ref readonly RenderEncoderBindings bindings)
+        {
+            if (resourcePointer != IntPtr.Zero)
+            {
+                bindings.Resources.Add(new Resource(new MTLResource(resourcePointer), usage, stages));
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void AddResource(IntPtr resourcePointer, MTLResourceUsage usage, ref readonly ComputeEncoderBindings bindings)
+        {
+            if (resourcePointer != IntPtr.Zero)
+            {
+                bindings.Resources.Add(new Resource(new MTLResource(resourcePointer), usage, 0));
+            }
+        }
+
+        private readonly void UpdateAndBind(Program program, uint setIndex, ref readonly RenderEncoderBindings bindings)
+        {
+            var bindingSegments = program.BindingSegments[setIndex];
+
+            if (bindingSegments.Length == 0)
+            {
+                return;
+            }
+
+            ScopedTemporaryBuffer vertArgBuffer = default;
+            ScopedTemporaryBuffer fragArgBuffer = default;
+
+            if (program.ArgumentBufferSizes[setIndex] > 0)
+            {
+                vertArgBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.ArgumentBufferSizes[setIndex] * sizeof(ulong));
+            }
+
+            if (program.FragArgumentBufferSizes[setIndex] > 0)
+            {
+                fragArgBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.FragArgumentBufferSizes[setIndex] * sizeof(ulong));
+            }
+
+            Span<ulong> vertResourceIds = stackalloc ulong[program.ArgumentBufferSizes[setIndex]];
+            Span<ulong> fragResourceIds = stackalloc ulong[program.FragArgumentBufferSizes[setIndex]];
+
+            var vertResourceIdIndex = 0;
+            var fragResourceIdIndex = 0;
+
+            foreach (ResourceBindingSegment segment in bindingSegments)
+            {
+                int binding = segment.Binding;
+                int count = segment.Count;
+
+                switch (setIndex)
+                {
+                    case Constants.ConstantBuffersSetIndex:
+                        for (int i = 0; i < count; i++)
+                        {
+                            int index = binding + i;
+
+                            ref BufferRef buffer = ref _currentState.UniformBufferRefs[index];
+                            var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer);
+
+                            MTLRenderStages renderStages = 0;
+
+                            if ((segment.Stages & ResourceStages.Vertex) != 0)
+                            {
+                                vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                vertResourceIdIndex++;
+
+                                renderStages |= MTLRenderStages.RenderStageVertex;
+                            }
+
+                            if ((segment.Stages & ResourceStages.Fragment) != 0)
+                            {
+                                fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                fragResourceIdIndex++;
+
+                                renderStages |= MTLRenderStages.RenderStageFragment;
+                            }
+
+                            AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings);
+                        }
+                        break;
+                    case Constants.StorageBuffersSetIndex:
+                        for (int i = 0; i < count; i++)
+                        {
+                            int index = binding + i;
+
+                            ref BufferRef buffer = ref _currentState.StorageBufferRefs[index];
+                            var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer);
+
+                            MTLRenderStages renderStages = 0;
+
+                            if ((segment.Stages & ResourceStages.Vertex) != 0)
+                            {
+                                vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                vertResourceIdIndex++;
+
+                                renderStages |= MTLRenderStages.RenderStageVertex;
+                            }
+
+                            if ((segment.Stages & ResourceStages.Fragment) != 0)
+                            {
+                                fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                fragResourceIdIndex++;
+
+                                renderStages |= MTLRenderStages.RenderStageFragment;
+                            }
+
+                            AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings);
+                        }
+                        break;
+                    case Constants.TexturesSetIndex:
+                        if (!segment.IsArray)
+                        {
+                            for (int i = 0; i < count; i++)
+                            {
+                                int index = binding + i;
+
+                                ref var texture = ref _currentState.TextureRefs[index];
+                                var (gpuAddress, nativePtr) = AddressForTexture(ref texture);
+
+                                MTLRenderStages renderStages = 0;
+
+                                if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                {
+                                    vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                    vertResourceIdIndex++;
+
+                                    if (texture.Sampler != null)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl;
+                                        vertResourceIdIndex++;
+                                    }
+
+                                    renderStages |= MTLRenderStages.RenderStageVertex;
+                                }
+
+                                if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                {
+                                    fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                    fragResourceIdIndex++;
+
+                                    if (texture.Sampler != null)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl;
+                                        fragResourceIdIndex++;
+                                    }
+
+                                    renderStages |= MTLRenderStages.RenderStageFragment;
+                                }
+
+                                AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings);
+                            }
+                        }
+                        else
+                        {
+                            var textureArray = _currentState.TextureArrayRefs[binding].Array;
+
+                            if (segment.Type != ResourceType.BufferTexture)
+                            {
+                                var textures = textureArray.GetTextureRefs();
+                                var samplers = new Auto<DisposableSampler>[textures.Length];
+
+                                for (int i = 0; i < textures.Length; i++)
+                                {
+                                    TextureRef texture = textures[i];
+                                    var (gpuAddress, nativePtr) = AddressForTexture(ref texture);
+
+                                    samplers[i] = texture.Sampler;
+
+                                    MTLRenderStages renderStages = 0;
+
+                                    if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                        vertResourceIdIndex++;
+
+                                        renderStages |= MTLRenderStages.RenderStageVertex;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                        fragResourceIdIndex++;
+
+                                        renderStages |= MTLRenderStages.RenderStageFragment;
+                                    }
+
+                                    AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings);
+                                }
+
+                                foreach (var sampler in samplers)
+                                {
+                                    ulong gpuAddress = 0;
+
+                                    if (sampler != null)
+                                    {
+                                        gpuAddress = sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                        vertResourceIdIndex++;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                        fragResourceIdIndex++;
+                                    }
+                                }
+                            }
+                            else
+                            {
+                                var bufferTextures = textureArray.GetBufferTextureRefs();
+
+                                for (int i = 0; i < bufferTextures.Length; i++)
+                                {
+                                    TextureBuffer bufferTexture = bufferTextures[i];
+                                    var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref bufferTexture);
+
+                                    MTLRenderStages renderStages = 0;
+
+                                    if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                        vertResourceIdIndex++;
+
+                                        renderStages |= MTLRenderStages.RenderStageVertex;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                        fragResourceIdIndex++;
+
+                                        renderStages |= MTLRenderStages.RenderStageFragment;
+                                    }
+
+                                    AddResource(nativePtr, MTLResourceUsage.Read, renderStages, in bindings);
+                                }
+                            }
+                        }
+                        break;
+                    case Constants.ImagesSetIndex:
+                        if (!segment.IsArray)
+                        {
+                            for (int i = 0; i < count; i++)
+                            {
+                                int index = binding + i;
+
+                                ref var image = ref _currentState.ImageRefs[index];
+                                var (gpuAddress, nativePtr) = AddressForImage(ref image);
+
+                                MTLRenderStages renderStages = 0;
+
+                                if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                {
+                                    vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                    vertResourceIdIndex++;
+                                    renderStages |= MTLRenderStages.RenderStageVertex;
+                                }
+
+                                if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                {
+                                    fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                    fragResourceIdIndex++;
+                                    renderStages |= MTLRenderStages.RenderStageFragment;
+                                }
+
+                                AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings);
+                            }
+                        }
+                        else
+                        {
+                            var imageArray = _currentState.ImageArrayRefs[binding].Array;
+
+                            if (segment.Type != ResourceType.BufferImage)
+                            {
+                                var images = imageArray.GetTextureRefs();
+
+                                for (int i = 0; i < images.Length; i++)
+                                {
+                                    TextureRef image = images[i];
+                                    var (gpuAddress, nativePtr) = AddressForTexture(ref image);
+
+                                    MTLRenderStages renderStages = 0;
+
+                                    if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                        vertResourceIdIndex++;
+                                        renderStages |= MTLRenderStages.RenderStageVertex;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                        fragResourceIdIndex++;
+                                        renderStages |= MTLRenderStages.RenderStageFragment;
+                                    }
+
+                                    AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings);
+                                }
+                            }
+                            else
+                            {
+                                var bufferImages = imageArray.GetBufferTextureRefs();
+
+                                for (int i = 0; i < bufferImages.Length; i++)
+                                {
+                                    TextureBuffer image = bufferImages[i];
+                                    var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref image);
+
+                                    MTLRenderStages renderStages = 0;
+
+                                    if ((segment.Stages & ResourceStages.Vertex) != 0)
+                                    {
+                                        vertResourceIds[vertResourceIdIndex] = gpuAddress;
+                                        vertResourceIdIndex++;
+                                        renderStages |= MTLRenderStages.RenderStageVertex;
+                                    }
+
+                                    if ((segment.Stages & ResourceStages.Fragment) != 0)
+                                    {
+                                        fragResourceIds[fragResourceIdIndex] = gpuAddress;
+                                        fragResourceIdIndex++;
+                                        renderStages |= MTLRenderStages.RenderStageFragment;
+                                    }
+
+                                    AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, renderStages, in bindings);
+                                }
+                            }
+                        }
+                        break;
+                }
+            }
+
+            if (program.ArgumentBufferSizes[setIndex] > 0)
+            {
+                vertArgBuffer.Holder.SetDataUnchecked(vertArgBuffer.Offset, MemoryMarshal.AsBytes(vertResourceIds));
+                var mtlVertArgBuffer = _bufferManager.GetBuffer(vertArgBuffer.Handle, false).Get(_pipeline.Cbs).Value;
+                bindings.VertexBuffers.Add(new BufferResource(mtlVertArgBuffer, (uint)vertArgBuffer.Range.Offset, SetIndexToBindingIndex(setIndex)));
+            }
+
+            if (program.FragArgumentBufferSizes[setIndex] > 0)
+            {
+                fragArgBuffer.Holder.SetDataUnchecked(fragArgBuffer.Offset, MemoryMarshal.AsBytes(fragResourceIds));
+                var mtlFragArgBuffer = _bufferManager.GetBuffer(fragArgBuffer.Handle, false).Get(_pipeline.Cbs).Value;
+                bindings.FragmentBuffers.Add(new BufferResource(mtlFragArgBuffer, (uint)fragArgBuffer.Range.Offset, SetIndexToBindingIndex(setIndex)));
+            }
+        }
+
+        private readonly void UpdateAndBind(Program program, uint setIndex, ref readonly ComputeEncoderBindings bindings)
+        {
+            var bindingSegments = program.BindingSegments[setIndex];
+
+            if (bindingSegments.Length == 0)
+            {
+                return;
+            }
+
+            ScopedTemporaryBuffer argBuffer = default;
+
+            if (program.ArgumentBufferSizes[setIndex] > 0)
+            {
+                argBuffer = _bufferManager.ReserveOrCreate(_pipeline.Cbs, program.ArgumentBufferSizes[setIndex] * sizeof(ulong));
+            }
+
+            Span<ulong> resourceIds = stackalloc ulong[program.ArgumentBufferSizes[setIndex]];
+            var resourceIdIndex = 0;
+
+            foreach (ResourceBindingSegment segment in bindingSegments)
+            {
+                int binding = segment.Binding;
+                int count = segment.Count;
+
+                switch (setIndex)
+                {
+                    case Constants.ConstantBuffersSetIndex:
+                        for (int i = 0; i < count; i++)
+                        {
+                            int index = binding + i;
+
+                            ref BufferRef buffer = ref _currentState.UniformBufferRefs[index];
+                            var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer);
+
+                            if ((segment.Stages & ResourceStages.Compute) != 0)
+                            {
+                                AddResource(nativePtr, MTLResourceUsage.Read, in bindings);
+                                bindings.Resources.Add(new Resource(new MTLResource(nativePtr), MTLResourceUsage.Read, 0));
+                                resourceIds[resourceIdIndex] = gpuAddress;
+                                resourceIdIndex++;
+                            }
+                        }
+                        break;
+                    case Constants.StorageBuffersSetIndex:
+                        for (int i = 0; i < count; i++)
+                        {
+                            int index = binding + i;
+
+                            ref BufferRef buffer = ref _currentState.StorageBufferRefs[index];
+                            var (gpuAddress, nativePtr) = AddressForBuffer(ref buffer);
+
+                            if ((segment.Stages & ResourceStages.Compute) != 0)
+                            {
+                                AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings);
+                                resourceIds[resourceIdIndex] = gpuAddress;
+                                resourceIdIndex++;
+                            }
+                        }
+                        break;
+                    case Constants.TexturesSetIndex:
+                        if (!segment.IsArray)
+                        {
+                            for (int i = 0; i < count; i++)
+                            {
+                                int index = binding + i;
+
+                                ref var texture = ref _currentState.TextureRefs[index];
+                                var (gpuAddress, nativePtr) = AddressForTexture(ref texture);
+
+                                if ((segment.Stages & ResourceStages.Compute) != 0)
+                                {
+                                    AddResource(nativePtr, MTLResourceUsage.Read, in bindings);
+                                    resourceIds[resourceIdIndex] = gpuAddress;
+                                    resourceIdIndex++;
+
+                                    if (texture.Sampler != null)
+                                    {
+                                        resourceIds[resourceIdIndex] = texture.Sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl;
+                                        resourceIdIndex++;
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            var textureArray = _currentState.TextureArrayRefs[binding].Array;
+
+                            if (segment.Type != ResourceType.BufferTexture)
+                            {
+                                var textures = textureArray.GetTextureRefs();
+                                var samplers = new Auto<DisposableSampler>[textures.Length];
+
+                                for (int i = 0; i < textures.Length; i++)
+                                {
+                                    TextureRef texture = textures[i];
+                                    var (gpuAddress, nativePtr) = AddressForTexture(ref texture);
+
+                                    if ((segment.Stages & ResourceStages.Compute) != 0)
+                                    {
+                                        AddResource(nativePtr, MTLResourceUsage.Read, in bindings);
+                                        resourceIds[resourceIdIndex] = gpuAddress;
+                                        resourceIdIndex++;
+
+                                        samplers[i] = texture.Sampler;
+                                    }
+                                }
+
+                                foreach (var sampler in samplers)
+                                {
+                                    if (sampler != null)
+                                    {
+                                        resourceIds[resourceIdIndex] = sampler.Get(_pipeline.Cbs).Value.GpuResourceID._impl;
+                                        resourceIdIndex++;
+                                    }
+                                }
+                            }
+                            else
+                            {
+                                var bufferTextures = textureArray.GetBufferTextureRefs();
+
+                                for (int i = 0; i < bufferTextures.Length; i++)
+                                {
+                                    TextureBuffer bufferTexture = bufferTextures[i];
+                                    var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref bufferTexture);
+
+                                    if ((segment.Stages & ResourceStages.Compute) != 0)
+                                    {
+                                        AddResource(nativePtr, MTLResourceUsage.Read, in bindings);
+                                        resourceIds[resourceIdIndex] = gpuAddress;
+                                        resourceIdIndex++;
+                                    }
+                                }
+                            }
+                        }
+                        break;
+                    case Constants.ImagesSetIndex:
+                        if (!segment.IsArray)
+                        {
+                            for (int i = 0; i < count; i++)
+                            {
+                                int index = binding + i;
+
+                                ref var image = ref _currentState.ImageRefs[index];
+                                var (gpuAddress, nativePtr) = AddressForImage(ref image);
+
+                                if ((segment.Stages & ResourceStages.Compute) != 0)
+                                {
+                                    AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings);
+                                    resourceIds[resourceIdIndex] = gpuAddress;
+                                    resourceIdIndex++;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            var imageArray = _currentState.ImageArrayRefs[binding].Array;
+
+                            if (segment.Type != ResourceType.BufferImage)
+                            {
+                                var images = imageArray.GetTextureRefs();
+
+                                for (int i = 0; i < images.Length; i++)
+                                {
+                                    TextureRef image = images[i];
+                                    var (gpuAddress, nativePtr) = AddressForTexture(ref image);
+
+                                    if ((segment.Stages & ResourceStages.Compute) != 0)
+                                    {
+                                        AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings);
+                                        resourceIds[resourceIdIndex] = gpuAddress;
+                                        resourceIdIndex++;
+                                    }
+                                }
+                            }
+                            else
+                            {
+                                var bufferImages = imageArray.GetBufferTextureRefs();
+
+                                for (int i = 0; i < bufferImages.Length; i++)
+                                {
+                                    TextureBuffer image = bufferImages[i];
+                                    var (gpuAddress, nativePtr) = AddressForTextureBuffer(ref image);
+
+                                    if ((segment.Stages & ResourceStages.Compute) != 0)
+                                    {
+                                        AddResource(nativePtr, MTLResourceUsage.Read | MTLResourceUsage.Write, in bindings);
+                                        resourceIds[resourceIdIndex] = gpuAddress;
+                                        resourceIdIndex++;
+                                    }
+                                }
+                            }
+                        }
+                        break;
+                }
+            }
+
+            if (program.ArgumentBufferSizes[setIndex] > 0)
+            {
+                argBuffer.Holder.SetDataUnchecked(argBuffer.Offset, MemoryMarshal.AsBytes(resourceIds));
+                var mtlArgBuffer = _bufferManager.GetBuffer(argBuffer.Handle, false).Get(_pipeline.Cbs).Value;
+                bindings.Buffers.Add(new BufferResource(mtlArgBuffer, (uint)argBuffer.Range.Offset, SetIndexToBindingIndex(setIndex)));
+            }
+        }
+
+        private static uint SetIndexToBindingIndex(uint setIndex)
+        {
+            return setIndex switch
+            {
+                Constants.ConstantBuffersSetIndex => Constants.ConstantBuffersIndex,
+                Constants.StorageBuffersSetIndex => Constants.StorageBuffersIndex,
+                Constants.TexturesSetIndex => Constants.TexturesIndex,
+                Constants.ImagesSetIndex => Constants.ImagesIndex,
+            };
+        }
+
+        private readonly void SetCullMode(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            renderCommandEncoder.SetCullMode(_currentState.CullMode);
+        }
+
+        private readonly void SetFrontFace(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            renderCommandEncoder.SetFrontFacingWinding(_currentState.Winding);
+        }
+
+        private readonly void SetStencilRefValue(MTLRenderCommandEncoder renderCommandEncoder)
+        {
+            renderCommandEncoder.SetStencilReferenceValues((uint)_currentState.FrontRefValue, (uint)_currentState.BackRefValue);
+        }
+    }
+}

+ 293 - 0
src/Ryujinx.Graphics.Metal/EnumConversion.cs

@@ -0,0 +1,293 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    static class EnumConversion
+    {
+        public static MTLSamplerAddressMode Convert(this AddressMode mode)
+        {
+            return mode switch
+            {
+                AddressMode.Clamp => MTLSamplerAddressMode.ClampToEdge, // TODO: Should be clamp.
+                AddressMode.Repeat => MTLSamplerAddressMode.Repeat,
+                AddressMode.MirrorClamp => MTLSamplerAddressMode.MirrorClampToEdge, // TODO: Should be mirror clamp.
+                AddressMode.MirroredRepeat => MTLSamplerAddressMode.MirrorRepeat,
+                AddressMode.ClampToBorder => MTLSamplerAddressMode.ClampToBorderColor,
+                AddressMode.ClampToEdge => MTLSamplerAddressMode.ClampToEdge,
+                AddressMode.MirrorClampToEdge => MTLSamplerAddressMode.MirrorClampToEdge,
+                AddressMode.MirrorClampToBorder => MTLSamplerAddressMode.ClampToBorderColor, // TODO: Should be mirror clamp to border.
+                _ => LogInvalidAndReturn(mode, nameof(AddressMode), MTLSamplerAddressMode.ClampToEdge) // TODO: Should be clamp.
+            };
+        }
+
+        public static MTLBlendFactor Convert(this BlendFactor factor)
+        {
+            return factor switch
+            {
+                BlendFactor.Zero or BlendFactor.ZeroGl => MTLBlendFactor.Zero,
+                BlendFactor.One or BlendFactor.OneGl => MTLBlendFactor.One,
+                BlendFactor.SrcColor or BlendFactor.SrcColorGl => MTLBlendFactor.SourceColor,
+                BlendFactor.OneMinusSrcColor or BlendFactor.OneMinusSrcColorGl => MTLBlendFactor.OneMinusSourceColor,
+                BlendFactor.SrcAlpha or BlendFactor.SrcAlphaGl => MTLBlendFactor.SourceAlpha,
+                BlendFactor.OneMinusSrcAlpha or BlendFactor.OneMinusSrcAlphaGl => MTLBlendFactor.OneMinusSourceAlpha,
+                BlendFactor.DstAlpha or BlendFactor.DstAlphaGl => MTLBlendFactor.DestinationAlpha,
+                BlendFactor.OneMinusDstAlpha or BlendFactor.OneMinusDstAlphaGl => MTLBlendFactor.OneMinusDestinationAlpha,
+                BlendFactor.DstColor or BlendFactor.DstColorGl => MTLBlendFactor.DestinationColor,
+                BlendFactor.OneMinusDstColor or BlendFactor.OneMinusDstColorGl => MTLBlendFactor.OneMinusDestinationColor,
+                BlendFactor.SrcAlphaSaturate or BlendFactor.SrcAlphaSaturateGl => MTLBlendFactor.SourceAlphaSaturated,
+                BlendFactor.Src1Color or BlendFactor.Src1ColorGl => MTLBlendFactor.Source1Color,
+                BlendFactor.OneMinusSrc1Color or BlendFactor.OneMinusSrc1ColorGl => MTLBlendFactor.OneMinusSource1Color,
+                BlendFactor.Src1Alpha or BlendFactor.Src1AlphaGl => MTLBlendFactor.Source1Alpha,
+                BlendFactor.OneMinusSrc1Alpha or BlendFactor.OneMinusSrc1AlphaGl => MTLBlendFactor.OneMinusSource1Alpha,
+                BlendFactor.ConstantColor => MTLBlendFactor.BlendColor,
+                BlendFactor.OneMinusConstantColor => MTLBlendFactor.OneMinusBlendColor,
+                BlendFactor.ConstantAlpha => MTLBlendFactor.BlendAlpha,
+                BlendFactor.OneMinusConstantAlpha => MTLBlendFactor.OneMinusBlendAlpha,
+                _ => LogInvalidAndReturn(factor, nameof(BlendFactor), MTLBlendFactor.Zero)
+            };
+        }
+
+        public static MTLBlendOperation Convert(this BlendOp op)
+        {
+            return op switch
+            {
+                BlendOp.Add or BlendOp.AddGl => MTLBlendOperation.Add,
+                BlendOp.Subtract or BlendOp.SubtractGl => MTLBlendOperation.Subtract,
+                BlendOp.ReverseSubtract or BlendOp.ReverseSubtractGl => MTLBlendOperation.ReverseSubtract,
+                BlendOp.Minimum => MTLBlendOperation.Min,
+                BlendOp.Maximum => MTLBlendOperation.Max,
+                _ => LogInvalidAndReturn(op, nameof(BlendOp), MTLBlendOperation.Add)
+            };
+        }
+
+        public static MTLCompareFunction Convert(this CompareOp op)
+        {
+            return op switch
+            {
+                CompareOp.Never or CompareOp.NeverGl => MTLCompareFunction.Never,
+                CompareOp.Less or CompareOp.LessGl => MTLCompareFunction.Less,
+                CompareOp.Equal or CompareOp.EqualGl => MTLCompareFunction.Equal,
+                CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => MTLCompareFunction.LessEqual,
+                CompareOp.Greater or CompareOp.GreaterGl => MTLCompareFunction.Greater,
+                CompareOp.NotEqual or CompareOp.NotEqualGl => MTLCompareFunction.NotEqual,
+                CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => MTLCompareFunction.GreaterEqual,
+                CompareOp.Always or CompareOp.AlwaysGl => MTLCompareFunction.Always,
+                _ => LogInvalidAndReturn(op, nameof(CompareOp), MTLCompareFunction.Never)
+            };
+        }
+
+        public static MTLCullMode Convert(this Face face)
+        {
+            return face switch
+            {
+                Face.Back => MTLCullMode.Back,
+                Face.Front => MTLCullMode.Front,
+                Face.FrontAndBack => MTLCullMode.None,
+                _ => LogInvalidAndReturn(face, nameof(Face), MTLCullMode.Back)
+            };
+        }
+
+        public static MTLWinding Convert(this FrontFace frontFace)
+        {
+            // The viewport is flipped vertically, therefore we need to switch the winding order as well
+            return frontFace switch
+            {
+                FrontFace.Clockwise => MTLWinding.CounterClockwise,
+                FrontFace.CounterClockwise => MTLWinding.Clockwise,
+                _ => LogInvalidAndReturn(frontFace, nameof(FrontFace), MTLWinding.Clockwise)
+            };
+        }
+
+        public static MTLIndexType Convert(this IndexType type)
+        {
+            return type switch
+            {
+                IndexType.UShort => MTLIndexType.UInt16,
+                IndexType.UInt => MTLIndexType.UInt32,
+                _ => LogInvalidAndReturn(type, nameof(IndexType), MTLIndexType.UInt16)
+            };
+        }
+
+        public static MTLLogicOperation Convert(this LogicalOp op)
+        {
+            return op switch
+            {
+                LogicalOp.Clear => MTLLogicOperation.Clear,
+                LogicalOp.And => MTLLogicOperation.And,
+                LogicalOp.AndReverse => MTLLogicOperation.AndReverse,
+                LogicalOp.Copy => MTLLogicOperation.Copy,
+                LogicalOp.AndInverted => MTLLogicOperation.AndInverted,
+                LogicalOp.Noop => MTLLogicOperation.Noop,
+                LogicalOp.Xor => MTLLogicOperation.Xor,
+                LogicalOp.Or => MTLLogicOperation.Or,
+                LogicalOp.Nor => MTLLogicOperation.Nor,
+                LogicalOp.Equiv => MTLLogicOperation.Equivalence,
+                LogicalOp.Invert => MTLLogicOperation.Invert,
+                LogicalOp.OrReverse => MTLLogicOperation.OrReverse,
+                LogicalOp.CopyInverted => MTLLogicOperation.CopyInverted,
+                LogicalOp.OrInverted => MTLLogicOperation.OrInverted,
+                LogicalOp.Nand => MTLLogicOperation.Nand,
+                LogicalOp.Set => MTLLogicOperation.Set,
+                _ => LogInvalidAndReturn(op, nameof(LogicalOp), MTLLogicOperation.And)
+            };
+        }
+
+        public static MTLSamplerMinMagFilter Convert(this MagFilter filter)
+        {
+            return filter switch
+            {
+                MagFilter.Nearest => MTLSamplerMinMagFilter.Nearest,
+                MagFilter.Linear => MTLSamplerMinMagFilter.Linear,
+                _ => LogInvalidAndReturn(filter, nameof(MagFilter), MTLSamplerMinMagFilter.Nearest)
+            };
+        }
+
+        public static (MTLSamplerMinMagFilter, MTLSamplerMipFilter) Convert(this MinFilter filter)
+        {
+            return filter switch
+            {
+                MinFilter.Nearest => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest),
+                MinFilter.Linear => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Linear),
+                MinFilter.NearestMipmapNearest => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest),
+                MinFilter.LinearMipmapNearest => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Nearest),
+                MinFilter.NearestMipmapLinear => (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Linear),
+                MinFilter.LinearMipmapLinear => (MTLSamplerMinMagFilter.Linear, MTLSamplerMipFilter.Linear),
+                _ => LogInvalidAndReturn(filter, nameof(MinFilter), (MTLSamplerMinMagFilter.Nearest, MTLSamplerMipFilter.Nearest))
+
+            };
+        }
+
+        public static MTLPrimitiveType Convert(this PrimitiveTopology topology)
+        {
+            return topology switch
+            {
+                PrimitiveTopology.Points => MTLPrimitiveType.Point,
+                PrimitiveTopology.Lines => MTLPrimitiveType.Line,
+                PrimitiveTopology.LineStrip => MTLPrimitiveType.LineStrip,
+                PrimitiveTopology.Triangles => MTLPrimitiveType.Triangle,
+                PrimitiveTopology.TriangleStrip => MTLPrimitiveType.TriangleStrip,
+                _ => LogInvalidAndReturn(topology, nameof(PrimitiveTopology), MTLPrimitiveType.Triangle)
+            };
+        }
+
+        public static MTLStencilOperation Convert(this StencilOp op)
+        {
+            return op switch
+            {
+                StencilOp.Keep or StencilOp.KeepGl => MTLStencilOperation.Keep,
+                StencilOp.Zero or StencilOp.ZeroGl => MTLStencilOperation.Zero,
+                StencilOp.Replace or StencilOp.ReplaceGl => MTLStencilOperation.Replace,
+                StencilOp.IncrementAndClamp or StencilOp.IncrementAndClampGl => MTLStencilOperation.IncrementClamp,
+                StencilOp.DecrementAndClamp or StencilOp.DecrementAndClampGl => MTLStencilOperation.DecrementClamp,
+                StencilOp.Invert or StencilOp.InvertGl => MTLStencilOperation.Invert,
+                StencilOp.IncrementAndWrap or StencilOp.IncrementAndWrapGl => MTLStencilOperation.IncrementWrap,
+                StencilOp.DecrementAndWrap or StencilOp.DecrementAndWrapGl => MTLStencilOperation.DecrementWrap,
+                _ => LogInvalidAndReturn(op, nameof(StencilOp), MTLStencilOperation.Keep)
+            };
+        }
+
+        public static MTLTextureType Convert(this Target target)
+        {
+            return target switch
+            {
+                Target.TextureBuffer => MTLTextureType.TextureBuffer,
+                Target.Texture1D => MTLTextureType.Type1D,
+                Target.Texture1DArray => MTLTextureType.Type1DArray,
+                Target.Texture2D => MTLTextureType.Type2D,
+                Target.Texture2DArray => MTLTextureType.Type2DArray,
+                Target.Texture2DMultisample => MTLTextureType.Type2DMultisample,
+                Target.Texture2DMultisampleArray => MTLTextureType.Type2DMultisampleArray,
+                Target.Texture3D => MTLTextureType.Type3D,
+                Target.Cubemap => MTLTextureType.Cube,
+                Target.CubemapArray => MTLTextureType.CubeArray,
+                _ => LogInvalidAndReturn(target, nameof(Target), MTLTextureType.Type2D)
+            };
+        }
+
+        public static MTLTextureSwizzle Convert(this SwizzleComponent swizzleComponent)
+        {
+            return swizzleComponent switch
+            {
+                SwizzleComponent.Zero => MTLTextureSwizzle.Zero,
+                SwizzleComponent.One => MTLTextureSwizzle.One,
+                SwizzleComponent.Red => MTLTextureSwizzle.Red,
+                SwizzleComponent.Green => MTLTextureSwizzle.Green,
+                SwizzleComponent.Blue => MTLTextureSwizzle.Blue,
+                SwizzleComponent.Alpha => MTLTextureSwizzle.Alpha,
+                _ => LogInvalidAndReturn(swizzleComponent, nameof(SwizzleComponent), MTLTextureSwizzle.Zero)
+            };
+        }
+
+        public static MTLVertexFormat Convert(this Format format)
+        {
+            return format switch
+            {
+                Format.R16Float => MTLVertexFormat.Half,
+                Format.R16G16Float => MTLVertexFormat.Half2,
+                Format.R16G16B16Float => MTLVertexFormat.Half3,
+                Format.R16G16B16A16Float => MTLVertexFormat.Half4,
+                Format.R32Float => MTLVertexFormat.Float,
+                Format.R32G32Float => MTLVertexFormat.Float2,
+                Format.R32G32B32Float => MTLVertexFormat.Float3,
+                Format.R11G11B10Float => MTLVertexFormat.FloatRG11B10,
+                Format.R32G32B32A32Float => MTLVertexFormat.Float4,
+                Format.R8Uint => MTLVertexFormat.UChar,
+                Format.R8G8Uint => MTLVertexFormat.UChar2,
+                Format.R8G8B8Uint => MTLVertexFormat.UChar3,
+                Format.R8G8B8A8Uint => MTLVertexFormat.UChar4,
+                Format.R16Uint => MTLVertexFormat.UShort,
+                Format.R16G16Uint => MTLVertexFormat.UShort2,
+                Format.R16G16B16Uint => MTLVertexFormat.UShort3,
+                Format.R16G16B16A16Uint => MTLVertexFormat.UShort4,
+                Format.R32Uint => MTLVertexFormat.UInt,
+                Format.R32G32Uint => MTLVertexFormat.UInt2,
+                Format.R32G32B32Uint => MTLVertexFormat.UInt3,
+                Format.R32G32B32A32Uint => MTLVertexFormat.UInt4,
+                Format.R8Sint => MTLVertexFormat.Char,
+                Format.R8G8Sint => MTLVertexFormat.Char2,
+                Format.R8G8B8Sint => MTLVertexFormat.Char3,
+                Format.R8G8B8A8Sint => MTLVertexFormat.Char4,
+                Format.R16Sint => MTLVertexFormat.Short,
+                Format.R16G16Sint => MTLVertexFormat.Short2,
+                Format.R16G16B16Sint => MTLVertexFormat.Short3,
+                Format.R16G16B16A16Sint => MTLVertexFormat.Short4,
+                Format.R32Sint => MTLVertexFormat.Int,
+                Format.R32G32Sint => MTLVertexFormat.Int2,
+                Format.R32G32B32Sint => MTLVertexFormat.Int3,
+                Format.R32G32B32A32Sint => MTLVertexFormat.Int4,
+                Format.R8Unorm => MTLVertexFormat.UCharNormalized,
+                Format.R8G8Unorm => MTLVertexFormat.UChar2Normalized,
+                Format.R8G8B8Unorm => MTLVertexFormat.UChar3Normalized,
+                Format.R8G8B8A8Unorm => MTLVertexFormat.UChar4Normalized,
+                Format.R16Unorm => MTLVertexFormat.UShortNormalized,
+                Format.R16G16Unorm => MTLVertexFormat.UShort2Normalized,
+                Format.R16G16B16Unorm => MTLVertexFormat.UShort3Normalized,
+                Format.R16G16B16A16Unorm => MTLVertexFormat.UShort4Normalized,
+                Format.R10G10B10A2Unorm => MTLVertexFormat.UInt1010102Normalized,
+                Format.R8Snorm => MTLVertexFormat.CharNormalized,
+                Format.R8G8Snorm => MTLVertexFormat.Char2Normalized,
+                Format.R8G8B8Snorm => MTLVertexFormat.Char3Normalized,
+                Format.R8G8B8A8Snorm => MTLVertexFormat.Char4Normalized,
+                Format.R16Snorm => MTLVertexFormat.ShortNormalized,
+                Format.R16G16Snorm => MTLVertexFormat.Short2Normalized,
+                Format.R16G16B16Snorm => MTLVertexFormat.Short3Normalized,
+                Format.R16G16B16A16Snorm => MTLVertexFormat.Short4Normalized,
+                Format.R10G10B10A2Snorm => MTLVertexFormat.Int1010102Normalized,
+
+                _ => LogInvalidAndReturn(format, nameof(Format), MTLVertexFormat.Float4)
+            };
+        }
+
+        private static T2 LogInvalidAndReturn<T1, T2>(T1 value, string name, T2 defaultValue = default)
+        {
+            Logger.Debug?.Print(LogClass.Gpu, $"Invalid {name} enum value: {value}.");
+
+            return defaultValue;
+        }
+    }
+}

+ 77 - 0
src/Ryujinx.Graphics.Metal/FenceHolder.cs

@@ -0,0 +1,77 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class FenceHolder : IDisposable
+    {
+        private MTLCommandBuffer _fence;
+        private int _referenceCount;
+        private bool _disposed;
+
+        public FenceHolder(MTLCommandBuffer fence)
+        {
+            _fence = fence;
+            _referenceCount = 1;
+        }
+
+        public MTLCommandBuffer GetUnsafe()
+        {
+            return _fence;
+        }
+
+        public bool TryGet(out MTLCommandBuffer fence)
+        {
+            int lastValue;
+            do
+            {
+                lastValue = _referenceCount;
+
+                if (lastValue == 0)
+                {
+                    fence = default;
+                    return false;
+                }
+            } while (Interlocked.CompareExchange(ref _referenceCount, lastValue + 1, lastValue) != lastValue);
+
+            fence = _fence;
+            return true;
+        }
+
+        public MTLCommandBuffer Get()
+        {
+            Interlocked.Increment(ref _referenceCount);
+            return _fence;
+        }
+
+        public void Put()
+        {
+            if (Interlocked.Decrement(ref _referenceCount) == 0)
+            {
+                _fence = default;
+            }
+        }
+
+        public void Wait()
+        {
+            _fence.WaitUntilCompleted();
+        }
+
+        public bool IsSignaled()
+        {
+            return _fence.Status == MTLCommandBufferStatus.Completed;
+        }
+
+        public void Dispose()
+        {
+            if (!_disposed)
+            {
+                Put();
+                _disposed = true;
+            }
+        }
+    }
+}

+ 49 - 0
src/Ryujinx.Graphics.Metal/FormatConverter.cs

@@ -0,0 +1,49 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Metal
+{
+    class FormatConverter
+    {
+        public static void ConvertD24S8ToD32FS8(Span<byte> output, ReadOnlySpan<byte> input)
+        {
+            const float UnormToFloat = 1f / 0xffffff;
+
+            Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
+            ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
+
+            int i = 0;
+
+            for (; i < inputUint.Length; i++)
+            {
+                uint depthStencil = inputUint[i];
+                uint depth = depthStencil >> 8;
+                uint stencil = depthStencil & 0xff;
+
+                int j = i * 2;
+
+                outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat);
+                outputUint[j + 1] = stencil;
+            }
+        }
+
+        public static void ConvertD32FS8ToD24S8(Span<byte> output, ReadOnlySpan<byte> input)
+        {
+            Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
+            ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
+
+            int i = 0;
+
+            for (; i < inputUint.Length; i += 2)
+            {
+                float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]);
+                uint stencil = inputUint[i + 1];
+                uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff);
+
+                int j = i >> 1;
+
+                outputUint[j] = depthStencil;
+            }
+        }
+    }
+}

+ 196 - 0
src/Ryujinx.Graphics.Metal/FormatTable.cs

@@ -0,0 +1,196 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    static class FormatTable
+    {
+        private static readonly MTLPixelFormat[] _table;
+
+        static FormatTable()
+        {
+            _table = new MTLPixelFormat[Enum.GetNames(typeof(Format)).Length];
+
+            Add(Format.R8Unorm, MTLPixelFormat.R8Unorm);
+            Add(Format.R8Snorm, MTLPixelFormat.R8Snorm);
+            Add(Format.R8Uint, MTLPixelFormat.R8Uint);
+            Add(Format.R8Sint, MTLPixelFormat.R8Sint);
+            Add(Format.R16Float, MTLPixelFormat.R16Float);
+            Add(Format.R16Unorm, MTLPixelFormat.R16Unorm);
+            Add(Format.R16Snorm, MTLPixelFormat.R16Snorm);
+            Add(Format.R16Uint, MTLPixelFormat.R16Uint);
+            Add(Format.R16Sint, MTLPixelFormat.R16Sint);
+            Add(Format.R32Float, MTLPixelFormat.R32Float);
+            Add(Format.R32Uint, MTLPixelFormat.R32Uint);
+            Add(Format.R32Sint, MTLPixelFormat.R32Sint);
+            Add(Format.R8G8Unorm, MTLPixelFormat.RG8Unorm);
+            Add(Format.R8G8Snorm, MTLPixelFormat.RG8Snorm);
+            Add(Format.R8G8Uint, MTLPixelFormat.RG8Uint);
+            Add(Format.R8G8Sint, MTLPixelFormat.RG8Sint);
+            Add(Format.R16G16Float, MTLPixelFormat.RG16Float);
+            Add(Format.R16G16Unorm, MTLPixelFormat.RG16Unorm);
+            Add(Format.R16G16Snorm, MTLPixelFormat.RG16Snorm);
+            Add(Format.R16G16Uint, MTLPixelFormat.RG16Uint);
+            Add(Format.R16G16Sint, MTLPixelFormat.RG16Sint);
+            Add(Format.R32G32Float, MTLPixelFormat.RG32Float);
+            Add(Format.R32G32Uint, MTLPixelFormat.RG32Uint);
+            Add(Format.R32G32Sint, MTLPixelFormat.RG32Sint);
+            // Add(Format.R8G8B8Unorm,         MTLPixelFormat.R8G8B8Unorm);
+            // Add(Format.R8G8B8Snorm,         MTLPixelFormat.R8G8B8Snorm);
+            // Add(Format.R8G8B8Uint,          MTLPixelFormat.R8G8B8Uint);
+            // Add(Format.R8G8B8Sint,          MTLPixelFormat.R8G8B8Sint);
+            // Add(Format.R16G16B16Float,      MTLPixelFormat.R16G16B16Float);
+            // Add(Format.R16G16B16Unorm,      MTLPixelFormat.R16G16B16Unorm);
+            // Add(Format.R16G16B16Snorm,      MTLPixelFormat.R16G16B16SNorm);
+            // Add(Format.R16G16B16Uint,       MTLPixelFormat.R16G16B16Uint);
+            // Add(Format.R16G16B16Sint,       MTLPixelFormat.R16G16B16Sint);
+            // Add(Format.R32G32B32Float,      MTLPixelFormat.R32G32B32Sfloat);
+            // Add(Format.R32G32B32Uint,       MTLPixelFormat.R32G32B32Uint);
+            // Add(Format.R32G32B32Sint,       MTLPixelFormat.R32G32B32Sint);
+            Add(Format.R8G8B8A8Unorm, MTLPixelFormat.RGBA8Unorm);
+            Add(Format.R8G8B8A8Snorm, MTLPixelFormat.RGBA8Snorm);
+            Add(Format.R8G8B8A8Uint, MTLPixelFormat.RGBA8Uint);
+            Add(Format.R8G8B8A8Sint, MTLPixelFormat.RGBA8Sint);
+            Add(Format.R16G16B16A16Float, MTLPixelFormat.RGBA16Float);
+            Add(Format.R16G16B16A16Unorm, MTLPixelFormat.RGBA16Unorm);
+            Add(Format.R16G16B16A16Snorm, MTLPixelFormat.RGBA16Snorm);
+            Add(Format.R16G16B16A16Uint, MTLPixelFormat.RGBA16Uint);
+            Add(Format.R16G16B16A16Sint, MTLPixelFormat.RGBA16Sint);
+            Add(Format.R32G32B32A32Float, MTLPixelFormat.RGBA32Float);
+            Add(Format.R32G32B32A32Uint, MTLPixelFormat.RGBA32Uint);
+            Add(Format.R32G32B32A32Sint, MTLPixelFormat.RGBA32Sint);
+            Add(Format.S8Uint, MTLPixelFormat.Stencil8);
+            Add(Format.D16Unorm, MTLPixelFormat.Depth16Unorm);
+            Add(Format.S8UintD24Unorm, MTLPixelFormat.Depth24UnormStencil8);
+            Add(Format.X8UintD24Unorm, MTLPixelFormat.Depth24UnormStencil8);
+            Add(Format.D32Float, MTLPixelFormat.Depth32Float);
+            Add(Format.D24UnormS8Uint, MTLPixelFormat.Depth24UnormStencil8);
+            Add(Format.D32FloatS8Uint, MTLPixelFormat.Depth32FloatStencil8);
+            Add(Format.R8G8B8A8Srgb, MTLPixelFormat.RGBA8UnormsRGB);
+            // Add(Format.R4G4Unorm,           MTLPixelFormat.R4G4Unorm);
+            Add(Format.R4G4B4A4Unorm, MTLPixelFormat.RGBA8Unorm);
+            // Add(Format.R5G5B5X1Unorm,       MTLPixelFormat.R5G5B5X1Unorm);
+            Add(Format.R5G5B5A1Unorm, MTLPixelFormat.BGR5A1Unorm);
+            Add(Format.R5G6B5Unorm, MTLPixelFormat.B5G6R5Unorm);
+            Add(Format.R10G10B10A2Unorm, MTLPixelFormat.RGB10A2Unorm);
+            Add(Format.R10G10B10A2Uint, MTLPixelFormat.RGB10A2Uint);
+            Add(Format.R11G11B10Float, MTLPixelFormat.RG11B10Float);
+            Add(Format.R9G9B9E5Float, MTLPixelFormat.RGB9E5Float);
+            Add(Format.Bc1RgbaUnorm, MTLPixelFormat.BC1RGBA);
+            Add(Format.Bc2Unorm, MTLPixelFormat.BC2RGBA);
+            Add(Format.Bc3Unorm, MTLPixelFormat.BC3RGBA);
+            Add(Format.Bc1RgbaSrgb, MTLPixelFormat.BC1RGBAsRGB);
+            Add(Format.Bc2Srgb, MTLPixelFormat.BC2RGBAsRGB);
+            Add(Format.Bc3Srgb, MTLPixelFormat.BC3RGBAsRGB);
+            Add(Format.Bc4Unorm, MTLPixelFormat.BC4RUnorm);
+            Add(Format.Bc4Snorm, MTLPixelFormat.BC4RSnorm);
+            Add(Format.Bc5Unorm, MTLPixelFormat.BC5RGUnorm);
+            Add(Format.Bc5Snorm, MTLPixelFormat.BC5RGSnorm);
+            Add(Format.Bc7Unorm, MTLPixelFormat.BC7RGBAUnorm);
+            Add(Format.Bc7Srgb, MTLPixelFormat.BC7RGBAUnormsRGB);
+            Add(Format.Bc6HSfloat, MTLPixelFormat.BC6HRGBFloat);
+            Add(Format.Bc6HUfloat, MTLPixelFormat.BC6HRGBUfloat);
+            Add(Format.Etc2RgbUnorm, MTLPixelFormat.ETC2RGB8);
+            // Add(Format.Etc2RgbaUnorm, MTLPixelFormat.ETC2RGBA8);
+            Add(Format.Etc2RgbPtaUnorm, MTLPixelFormat.ETC2RGB8A1);
+            Add(Format.Etc2RgbSrgb, MTLPixelFormat.ETC2RGB8sRGB);
+            // Add(Format.Etc2RgbaSrgb, MTLPixelFormat.ETC2RGBA8sRGB);
+            Add(Format.Etc2RgbPtaSrgb, MTLPixelFormat.ETC2RGB8A1sRGB);
+            // Add(Format.R8Uscaled,           MTLPixelFormat.R8Uscaled);
+            // Add(Format.R8Sscaled,           MTLPixelFormat.R8Sscaled);
+            // Add(Format.R16Uscaled,          MTLPixelFormat.R16Uscaled);
+            // Add(Format.R16Sscaled,          MTLPixelFormat.R16Sscaled);
+            // Add(Format.R32Uscaled,          MTLPixelFormat.R32Uscaled);
+            // Add(Format.R32Sscaled,          MTLPixelFormat.R32Sscaled);
+            // Add(Format.R8G8Uscaled,         MTLPixelFormat.R8G8Uscaled);
+            // Add(Format.R8G8Sscaled,         MTLPixelFormat.R8G8Sscaled);
+            // Add(Format.R16G16Uscaled,       MTLPixelFormat.R16G16Uscaled);
+            // Add(Format.R16G16Sscaled,       MTLPixelFormat.R16G16Sscaled);
+            // Add(Format.R32G32Uscaled,       MTLPixelFormat.R32G32Uscaled);
+            // Add(Format.R32G32Sscaled,       MTLPixelFormat.R32G32Sscaled);
+            // Add(Format.R8G8B8Uscaled,       MTLPixelFormat.R8G8B8Uscaled);
+            // Add(Format.R8G8B8Sscaled,       MTLPixelFormat.R8G8B8Sscaled);
+            // Add(Format.R16G16B16Uscaled,    MTLPixelFormat.R16G16B16Uscaled);
+            // Add(Format.R16G16B16Sscaled,    MTLPixelFormat.R16G16B16Sscaled);
+            // Add(Format.R32G32B32Uscaled,    MTLPixelFormat.R32G32B32Uscaled);
+            // Add(Format.R32G32B32Sscaled,    MTLPixelFormat.R32G32B32Sscaled);
+            // Add(Format.R8G8B8A8Uscaled,     MTLPixelFormat.R8G8B8A8Uscaled);
+            // Add(Format.R8G8B8A8Sscaled,     MTLPixelFormat.R8G8B8A8Sscaled);
+            // Add(Format.R16G16B16A16Uscaled, MTLPixelFormat.R16G16B16A16Uscaled);
+            // Add(Format.R16G16B16A16Sscaled, MTLPixelFormat.R16G16B16A16Sscaled);
+            // Add(Format.R32G32B32A32Uscaled, MTLPixelFormat.R32G32B32A32Uscaled);
+            // Add(Format.R32G32B32A32Sscaled, MTLPixelFormat.R32G32B32A32Sscaled);
+            // Add(Format.R10G10B10A2Snorm,    MTLPixelFormat.A2B10G10R10SNormPack32);
+            // Add(Format.R10G10B10A2Sint,     MTLPixelFormat.A2B10G10R10SintPack32);
+            // Add(Format.R10G10B10A2Uscaled,  MTLPixelFormat.A2B10G10R10UscaledPack32);
+            // Add(Format.R10G10B10A2Sscaled,  MTLPixelFormat.A2B10G10R10SscaledPack32);
+            Add(Format.Astc4x4Unorm, MTLPixelFormat.ASTC4x4LDR);
+            Add(Format.Astc5x4Unorm, MTLPixelFormat.ASTC5x4LDR);
+            Add(Format.Astc5x5Unorm, MTLPixelFormat.ASTC5x5LDR);
+            Add(Format.Astc6x5Unorm, MTLPixelFormat.ASTC6x5LDR);
+            Add(Format.Astc6x6Unorm, MTLPixelFormat.ASTC6x6LDR);
+            Add(Format.Astc8x5Unorm, MTLPixelFormat.ASTC8x5LDR);
+            Add(Format.Astc8x6Unorm, MTLPixelFormat.ASTC8x6LDR);
+            Add(Format.Astc8x8Unorm, MTLPixelFormat.ASTC8x8LDR);
+            Add(Format.Astc10x5Unorm, MTLPixelFormat.ASTC10x5LDR);
+            Add(Format.Astc10x6Unorm, MTLPixelFormat.ASTC10x6LDR);
+            Add(Format.Astc10x8Unorm, MTLPixelFormat.ASTC10x8LDR);
+            Add(Format.Astc10x10Unorm, MTLPixelFormat.ASTC10x10LDR);
+            Add(Format.Astc12x10Unorm, MTLPixelFormat.ASTC12x10LDR);
+            Add(Format.Astc12x12Unorm, MTLPixelFormat.ASTC12x12LDR);
+            Add(Format.Astc4x4Srgb, MTLPixelFormat.ASTC4x4sRGB);
+            Add(Format.Astc5x4Srgb, MTLPixelFormat.ASTC5x4sRGB);
+            Add(Format.Astc5x5Srgb, MTLPixelFormat.ASTC5x5sRGB);
+            Add(Format.Astc6x5Srgb, MTLPixelFormat.ASTC6x5sRGB);
+            Add(Format.Astc6x6Srgb, MTLPixelFormat.ASTC6x6sRGB);
+            Add(Format.Astc8x5Srgb, MTLPixelFormat.ASTC8x5sRGB);
+            Add(Format.Astc8x6Srgb, MTLPixelFormat.ASTC8x6sRGB);
+            Add(Format.Astc8x8Srgb, MTLPixelFormat.ASTC8x8sRGB);
+            Add(Format.Astc10x5Srgb, MTLPixelFormat.ASTC10x5sRGB);
+            Add(Format.Astc10x6Srgb, MTLPixelFormat.ASTC10x6sRGB);
+            Add(Format.Astc10x8Srgb, MTLPixelFormat.ASTC10x8sRGB);
+            Add(Format.Astc10x10Srgb, MTLPixelFormat.ASTC10x10sRGB);
+            Add(Format.Astc12x10Srgb, MTLPixelFormat.ASTC12x10sRGB);
+            Add(Format.Astc12x12Srgb, MTLPixelFormat.ASTC12x12sRGB);
+            Add(Format.B5G6R5Unorm, MTLPixelFormat.B5G6R5Unorm);
+            Add(Format.B5G5R5A1Unorm, MTLPixelFormat.BGR5A1Unorm);
+            Add(Format.A1B5G5R5Unorm, MTLPixelFormat.A1BGR5Unorm);
+            Add(Format.B8G8R8A8Unorm, MTLPixelFormat.BGRA8Unorm);
+            Add(Format.B8G8R8A8Srgb, MTLPixelFormat.BGRA8UnormsRGB);
+        }
+
+        private static void Add(Format format, MTLPixelFormat mtlFormat)
+        {
+            _table[(int)format] = mtlFormat;
+        }
+
+        public static MTLPixelFormat GetFormat(Format format)
+        {
+            var mtlFormat = _table[(int)format];
+
+            if (IsD24S8(format))
+            {
+                if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported)
+                {
+                    mtlFormat = MTLPixelFormat.Depth32FloatStencil8;
+                }
+            }
+
+            if (mtlFormat == MTLPixelFormat.Invalid)
+            {
+                Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host.");
+            }
+
+            return mtlFormat;
+        }
+
+        public static bool IsD24S8(Format format)
+        {
+            return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm;
+        }
+    }
+}

+ 82 - 0
src/Ryujinx.Graphics.Metal/HardwareInfo.cs

@@ -0,0 +1,82 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Metal
+{
+    static partial class HardwareInfoTools
+    {
+
+        private readonly static IntPtr _kCFAllocatorDefault = IntPtr.Zero;
+        private readonly static UInt32 _kCFStringEncodingASCII = 0x0600;
+        private const string IOKit = "/System/Library/Frameworks/IOKit.framework/IOKit";
+        private const string CoreFoundation = "/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation";
+
+        [LibraryImport(IOKit, StringMarshalling = StringMarshalling.Utf8)]
+        private static partial IntPtr IOServiceMatching(string name);
+
+        [LibraryImport(IOKit)]
+        private static partial IntPtr IOServiceGetMatchingService(IntPtr mainPort, IntPtr matching);
+
+        [LibraryImport(IOKit)]
+        private static partial IntPtr IORegistryEntryCreateCFProperty(IntPtr entry, IntPtr key, IntPtr allocator, UInt32 options);
+
+        [LibraryImport(CoreFoundation, StringMarshalling = StringMarshalling.Utf8)]
+        private static partial IntPtr CFStringCreateWithCString(IntPtr allocator, string cString, UInt32 encoding);
+
+        [LibraryImport(CoreFoundation)]
+        [return: MarshalAs(UnmanagedType.U1)]
+        public static partial bool CFStringGetCString(IntPtr theString, IntPtr buffer, long bufferSizes, UInt32 encoding);
+
+        [LibraryImport(CoreFoundation)]
+        public static partial IntPtr CFDataGetBytePtr(IntPtr theData);
+
+        static string GetNameFromId(uint id)
+        {
+            return id switch
+            {
+                0x1002 => "AMD",
+                0x106B => "Apple",
+                0x10DE => "NVIDIA",
+                0x13B5 => "ARM",
+                0x8086 => "Intel",
+                _ => $"0x{id:X}"
+            };
+        }
+
+        public static string GetVendor()
+        {
+            var serviceDict = IOServiceMatching("IOGPU");
+            var service = IOServiceGetMatchingService(IntPtr.Zero, serviceDict);
+            var cfString = CFStringCreateWithCString(_kCFAllocatorDefault, "vendor-id", _kCFStringEncodingASCII);
+            var cfProperty = IORegistryEntryCreateCFProperty(service, cfString, _kCFAllocatorDefault, 0);
+
+            byte[] buffer = new byte[4];
+            var bufferPtr = CFDataGetBytePtr(cfProperty);
+            Marshal.Copy(bufferPtr, buffer, 0, buffer.Length);
+
+            var vendorId = BitConverter.ToUInt32(buffer);
+
+            return GetNameFromId(vendorId);
+        }
+
+        public static string GetModel()
+        {
+            var serviceDict = IOServiceMatching("IOGPU");
+            var service = IOServiceGetMatchingService(IntPtr.Zero, serviceDict);
+            var cfString = CFStringCreateWithCString(_kCFAllocatorDefault, "model", _kCFStringEncodingASCII);
+            var cfProperty = IORegistryEntryCreateCFProperty(service, cfString, _kCFAllocatorDefault, 0);
+
+            char[] buffer = new char[64];
+            IntPtr bufferPtr = Marshal.AllocHGlobal(buffer.Length);
+
+            if (CFStringGetCString(cfProperty, bufferPtr, buffer.Length, _kCFStringEncodingASCII))
+            {
+                var model = Marshal.PtrToStringUTF8(bufferPtr);
+                Marshal.FreeHGlobal(bufferPtr);
+                return model;
+            }
+
+            return "";
+        }
+    }
+}

+ 143 - 0
src/Ryujinx.Graphics.Metal/HashTableSlim.cs

@@ -0,0 +1,143 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Metal
+{
+    interface IRefEquatable<T>
+    {
+        bool Equals(ref T other);
+    }
+
+    class HashTableSlim<TKey, TValue> where TKey : IRefEquatable<TKey>
+    {
+        private const int TotalBuckets = 16; // Must be power of 2
+        private const int TotalBucketsMask = TotalBuckets - 1;
+
+        private struct Entry
+        {
+            public int Hash;
+            public TKey Key;
+            public TValue Value;
+        }
+
+        private struct Bucket
+        {
+            public int Length;
+            public Entry[] Entries;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public readonly Span<Entry> AsSpan()
+            {
+                return Entries == null ? Span<Entry>.Empty : Entries.AsSpan(0, Length);
+            }
+        }
+
+        private readonly Bucket[] _hashTable = new Bucket[TotalBuckets];
+
+        public IEnumerable<TKey> Keys
+        {
+            get
+            {
+                foreach (Bucket bucket in _hashTable)
+                {
+                    for (int i = 0; i < bucket.Length; i++)
+                    {
+                        yield return bucket.Entries[i].Key;
+                    }
+                }
+            }
+        }
+
+        public IEnumerable<TValue> Values
+        {
+            get
+            {
+                foreach (Bucket bucket in _hashTable)
+                {
+                    for (int i = 0; i < bucket.Length; i++)
+                    {
+                        yield return bucket.Entries[i].Value;
+                    }
+                }
+            }
+        }
+
+        public void Add(ref TKey key, TValue value)
+        {
+            var entry = new Entry
+            {
+                Hash = key.GetHashCode(),
+                Key = key,
+                Value = value,
+            };
+
+            int hashCode = key.GetHashCode();
+            int bucketIndex = hashCode & TotalBucketsMask;
+
+            ref var bucket = ref _hashTable[bucketIndex];
+            if (bucket.Entries != null)
+            {
+                int index = bucket.Length;
+
+                if (index >= bucket.Entries.Length)
+                {
+                    Array.Resize(ref bucket.Entries, index + 1);
+                }
+
+                bucket.Entries[index] = entry;
+            }
+            else
+            {
+                bucket.Entries = new[]
+                {
+                    entry,
+                };
+            }
+
+            bucket.Length++;
+        }
+
+        public bool Remove(ref TKey key)
+        {
+            int hashCode = key.GetHashCode();
+
+            ref var bucket = ref _hashTable[hashCode & TotalBucketsMask];
+            var entries = bucket.AsSpan();
+            for (int i = 0; i < entries.Length; i++)
+            {
+                ref var entry = ref entries[i];
+
+                if (entry.Hash == hashCode && entry.Key.Equals(ref key))
+                {
+                    entries[(i + 1)..].CopyTo(entries[i..]);
+                    bucket.Length--;
+
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        public bool TryGetValue(ref TKey key, out TValue value)
+        {
+            int hashCode = key.GetHashCode();
+
+            var entries = _hashTable[hashCode & TotalBucketsMask].AsSpan();
+            for (int i = 0; i < entries.Length; i++)
+            {
+                ref var entry = ref entries[i];
+
+                if (entry.Hash == hashCode && entry.Key.Equals(ref key))
+                {
+                    value = entry.Value;
+                    return true;
+                }
+            }
+
+            value = default;
+            return false;
+        }
+    }
+}

+ 868 - 0
src/Ryujinx.Graphics.Metal/HelperShader.cs

@@ -0,0 +1,868 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using Ryujinx.Graphics.Shader.Translation;
+using SharpMetal.Metal;
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class HelperShader : IDisposable
+    {
+        private const int ConvertElementsPerWorkgroup = 32 * 100; // Work group size of 32 times 100 elements.
+        private const string ShadersSourcePath = "/Ryujinx.Graphics.Metal/Shaders";
+        private readonly MetalRenderer _renderer;
+        private readonly Pipeline _pipeline;
+        private MTLDevice _device;
+
+        private readonly ISampler _samplerLinear;
+        private readonly ISampler _samplerNearest;
+        private readonly IProgram _programColorBlitF;
+        private readonly IProgram _programColorBlitI;
+        private readonly IProgram _programColorBlitU;
+        private readonly IProgram _programColorBlitMsF;
+        private readonly IProgram _programColorBlitMsI;
+        private readonly IProgram _programColorBlitMsU;
+        private readonly List<IProgram> _programsColorClearF = new();
+        private readonly List<IProgram> _programsColorClearI = new();
+        private readonly List<IProgram> _programsColorClearU = new();
+        private readonly IProgram _programDepthStencilClear;
+        private readonly IProgram _programStrideChange;
+        private readonly IProgram _programConvertD32S8ToD24S8;
+        private readonly IProgram _programConvertIndexBuffer;
+        private readonly IProgram _programDepthBlit;
+        private readonly IProgram _programDepthBlitMs;
+        private readonly IProgram _programStencilBlit;
+        private readonly IProgram _programStencilBlitMs;
+
+        private readonly EncoderState _helperShaderState = new();
+
+        public HelperShader(MTLDevice device, MetalRenderer renderer, Pipeline pipeline)
+        {
+            _device = device;
+            _renderer = renderer;
+            _pipeline = pipeline;
+
+            _samplerNearest = new SamplerHolder(renderer, _device, SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest));
+            _samplerLinear = new SamplerHolder(renderer, _device, SamplerCreateInfo.Create(MinFilter.Linear, MagFilter.Linear));
+
+            var blitResourceLayout = new ResourceLayoutBuilder()
+                .Add(ResourceStages.Vertex, ResourceType.UniformBuffer, 0)
+                .Add(ResourceStages.Fragment, ResourceType.TextureAndSampler, 0).Build();
+
+            var blitSource = ReadMsl("Blit.metal");
+
+            var blitSourceF = blitSource.Replace("FORMAT", "float", StringComparison.Ordinal);
+            _programColorBlitF = new Program(renderer, device, [
+                new ShaderSource(blitSourceF, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var blitSourceI = blitSource.Replace("FORMAT", "int");
+            _programColorBlitI = new Program(renderer, device, [
+                new ShaderSource(blitSourceI, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceI, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var blitSourceU = blitSource.Replace("FORMAT", "uint");
+            _programColorBlitU = new Program(renderer, device, [
+                new ShaderSource(blitSourceU, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceU, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var blitMsSource = ReadMsl("BlitMs.metal");
+
+            var blitMsSourceF = blitMsSource.Replace("FORMAT", "float");
+            _programColorBlitMsF = new Program(renderer, device, [
+                new ShaderSource(blitMsSourceF, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitMsSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var blitMsSourceI = blitMsSource.Replace("FORMAT", "int");
+            _programColorBlitMsI = new Program(renderer, device, [
+                new ShaderSource(blitMsSourceI, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitMsSourceI, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var blitMsSourceU = blitMsSource.Replace("FORMAT", "uint");
+            _programColorBlitMsU = new Program(renderer, device, [
+                new ShaderSource(blitMsSourceU, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitMsSourceU, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var colorClearResourceLayout = new ResourceLayoutBuilder()
+                .Add(ResourceStages.Fragment, ResourceType.UniformBuffer, 0).Build();
+
+            var colorClearSource = ReadMsl("ColorClear.metal");
+
+            for (int i = 0; i < Constants.MaxColorAttachments; i++)
+            {
+                var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "float");
+                _programsColorClearF.Add(new Program(renderer, device, [
+                    new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                    new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl)
+                ], colorClearResourceLayout));
+            }
+
+            for (int i = 0; i < Constants.MaxColorAttachments; i++)
+            {
+                var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "int");
+                _programsColorClearI.Add(new Program(renderer, device, [
+                    new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                    new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl)
+                ], colorClearResourceLayout));
+            }
+
+            for (int i = 0; i < Constants.MaxColorAttachments; i++)
+            {
+                var crntSource = colorClearSource.Replace("COLOR_ATTACHMENT_INDEX", i.ToString()).Replace("FORMAT", "uint");
+                _programsColorClearU.Add(new Program(renderer, device, [
+                    new ShaderSource(crntSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                    new ShaderSource(crntSource, ShaderStage.Vertex, TargetLanguage.Msl)
+                ], colorClearResourceLayout));
+            }
+
+            var depthStencilClearSource = ReadMsl("DepthStencilClear.metal");
+            _programDepthStencilClear = new Program(renderer, device, [
+                new ShaderSource(depthStencilClearSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(depthStencilClearSource, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], colorClearResourceLayout);
+
+            var strideChangeResourceLayout = new ResourceLayoutBuilder()
+                .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build();
+
+            var strideChangeSource = ReadMsl("ChangeBufferStride.metal");
+            _programStrideChange = new Program(renderer, device, [
+                new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
+            ], strideChangeResourceLayout, new ComputeSize(64, 1, 1));
+
+            var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder()
+                .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build();
+
+            var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal");
+            _programConvertD32S8ToD24S8 = new Program(renderer, device, [
+                new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl)
+            ], convertD32S8ToD24S8ResourceLayout, new ComputeSize(64, 1, 1));
+
+            var convertIndexBufferLayout = new ResourceLayoutBuilder()
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true)
+                .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 3).Build();
+
+            var convertIndexBufferSource = ReadMsl("ConvertIndexBuffer.metal");
+            _programConvertIndexBuffer = new Program(renderer, device, [
+                new ShaderSource(convertIndexBufferSource, ShaderStage.Compute, TargetLanguage.Msl)
+            ], convertIndexBufferLayout, new ComputeSize(16, 1, 1));
+
+            var depthBlitSource = ReadMsl("DepthBlit.metal");
+            _programDepthBlit = new Program(renderer, device, [
+                new ShaderSource(depthBlitSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var depthBlitMsSource = ReadMsl("DepthBlitMs.metal");
+            _programDepthBlitMs = new Program(renderer, device, [
+                new ShaderSource(depthBlitMsSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var stencilBlitSource = ReadMsl("StencilBlit.metal");
+            _programStencilBlit = new Program(renderer, device, [
+                new ShaderSource(stencilBlitSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+
+            var stencilBlitMsSource = ReadMsl("StencilBlitMs.metal");
+            _programStencilBlitMs = new Program(renderer, device, [
+                new ShaderSource(stencilBlitMsSource, ShaderStage.Fragment, TargetLanguage.Msl),
+                new ShaderSource(blitSourceF, ShaderStage.Vertex, TargetLanguage.Msl)
+            ], blitResourceLayout);
+        }
+
+        private static string ReadMsl(string fileName)
+        {
+            var msl = EmbeddedResources.ReadAllText(string.Join('/', ShadersSourcePath, fileName));
+
+#pragma warning disable IDE0055 // Disable formatting
+            msl = msl.Replace("CONSTANT_BUFFERS_INDEX", $"{Constants.ConstantBuffersIndex}")
+                     .Replace("STORAGE_BUFFERS_INDEX", $"{Constants.StorageBuffersIndex}")
+                     .Replace("TEXTURES_INDEX", $"{Constants.TexturesIndex}")
+                     .Replace("IMAGES_INDEX", $"{Constants.ImagesIndex}");
+#pragma warning restore IDE0055
+
+            return msl;
+        }
+
+        public unsafe void BlitColor(
+            CommandBufferScoped cbs,
+            Texture src,
+            Texture dst,
+            Extents2D srcRegion,
+            Extents2D dstRegion,
+            bool linearFilter,
+            bool clear = false)
+        {
+            _pipeline.SwapState(_helperShaderState);
+
+            const int RegionBufferSize = 16;
+
+            var sampler = linearFilter ? _samplerLinear : _samplerNearest;
+
+            _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, sampler);
+
+            Span<float> region = stackalloc float[RegionBufferSize / sizeof(float)];
+
+            region[0] = srcRegion.X1 / (float)src.Width;
+            region[1] = srcRegion.X2 / (float)src.Width;
+            region[2] = srcRegion.Y1 / (float)src.Height;
+            region[3] = srcRegion.Y2 / (float)src.Height;
+
+            if (dstRegion.X1 > dstRegion.X2)
+            {
+                (region[0], region[1]) = (region[1], region[0]);
+            }
+
+            if (dstRegion.Y1 > dstRegion.Y2)
+            {
+                (region[2], region[3]) = (region[3], region[2]);
+            }
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize);
+            buffer.Holder.SetDataUnchecked<float>(buffer.Offset, region);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            var rect = new Rectangle<float>(
+                MathF.Min(dstRegion.X1, dstRegion.X2),
+                MathF.Min(dstRegion.Y1, dstRegion.Y2),
+                MathF.Abs(dstRegion.X2 - dstRegion.X1),
+                MathF.Abs(dstRegion.Y2 - dstRegion.Y1));
+
+            Span<Viewport> viewports = stackalloc Viewport[16];
+
+            viewports[0] = new Viewport(
+                rect,
+                ViewportSwizzle.PositiveX,
+                ViewportSwizzle.PositiveY,
+                ViewportSwizzle.PositiveZ,
+                ViewportSwizzle.PositiveW,
+                0f,
+                1f);
+
+            bool dstIsDepthOrStencil = dst.Info.Format.IsDepthOrStencil();
+
+            if (dstIsDepthOrStencil)
+            {
+                // TODO: Depth & stencil blit!
+                Logger.Warning?.PrintMsg(LogClass.Gpu, "Requested a depth or stencil blit!");
+                _pipeline.SwapState(null);
+                return;
+            }
+
+            var debugGroupName = "Blit Color ";
+
+            if (src.Info.Target.IsMultisample())
+            {
+                if (dst.Info.Format.IsSint())
+                {
+                    debugGroupName += "MS Int";
+                    _pipeline.SetProgram(_programColorBlitMsI);
+                }
+                else if (dst.Info.Format.IsUint())
+                {
+                    debugGroupName += "MS UInt";
+                    _pipeline.SetProgram(_programColorBlitMsU);
+                }
+                else
+                {
+                    debugGroupName += "MS Float";
+                    _pipeline.SetProgram(_programColorBlitMsF);
+                }
+            }
+            else
+            {
+                if (dst.Info.Format.IsSint())
+                {
+                    debugGroupName += "Int";
+                    _pipeline.SetProgram(_programColorBlitI);
+                }
+                else if (dst.Info.Format.IsUint())
+                {
+                    debugGroupName += "UInt";
+                    _pipeline.SetProgram(_programColorBlitU);
+                }
+                else
+                {
+                    debugGroupName += "Float";
+                    _pipeline.SetProgram(_programColorBlitF);
+                }
+            }
+
+            int dstWidth = dst.Width;
+            int dstHeight = dst.Height;
+
+            Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[16];
+
+            scissors[0] = new Rectangle<int>(0, 0, dstWidth, dstHeight);
+
+            _pipeline.SetRenderTargets([dst], null);
+            _pipeline.SetScissors(scissors);
+
+            _pipeline.SetClearLoadAction(clear);
+
+            _pipeline.SetViewports(viewports);
+            _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
+            _pipeline.Draw(4, 1, 0, 0, debugGroupName);
+
+            // Cleanup
+            if (clear)
+            {
+                _pipeline.SetClearLoadAction(false);
+            }
+
+            // Restore previous state
+            _pipeline.SwapState(null);
+        }
+
+        public unsafe void BlitDepthStencil(
+            CommandBufferScoped cbs,
+            Texture src,
+            Texture dst,
+            Extents2D srcRegion,
+            Extents2D dstRegion)
+        {
+            _pipeline.SwapState(_helperShaderState);
+
+            const int RegionBufferSize = 16;
+
+            Span<float> region = stackalloc float[RegionBufferSize / sizeof(float)];
+
+            region[0] = srcRegion.X1 / (float)src.Width;
+            region[1] = srcRegion.X2 / (float)src.Width;
+            region[2] = srcRegion.Y1 / (float)src.Height;
+            region[3] = srcRegion.Y2 / (float)src.Height;
+
+            if (dstRegion.X1 > dstRegion.X2)
+            {
+                (region[0], region[1]) = (region[1], region[0]);
+            }
+
+            if (dstRegion.Y1 > dstRegion.Y2)
+            {
+                (region[2], region[3]) = (region[3], region[2]);
+            }
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, RegionBufferSize);
+            buffer.Holder.SetDataUnchecked<float>(buffer.Offset, region);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            Span<Viewport> viewports = stackalloc Viewport[16];
+
+            var rect = new Rectangle<float>(
+                MathF.Min(dstRegion.X1, dstRegion.X2),
+                MathF.Min(dstRegion.Y1, dstRegion.Y2),
+                MathF.Abs(dstRegion.X2 - dstRegion.X1),
+                MathF.Abs(dstRegion.Y2 - dstRegion.Y1));
+
+            viewports[0] = new Viewport(
+                rect,
+                ViewportSwizzle.PositiveX,
+                ViewportSwizzle.PositiveY,
+                ViewportSwizzle.PositiveZ,
+                ViewportSwizzle.PositiveW,
+                0f,
+                1f);
+
+            int dstWidth = dst.Width;
+            int dstHeight = dst.Height;
+
+            Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[16];
+
+            scissors[0] = new Rectangle<int>(0, 0, dstWidth, dstHeight);
+
+            _pipeline.SetRenderTargets([], dst);
+            _pipeline.SetScissors(scissors);
+            _pipeline.SetViewports(viewports);
+            _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
+
+            if (src.Info.Format is
+                Format.D16Unorm or
+                Format.D32Float or
+                Format.X8UintD24Unorm or
+                Format.D24UnormS8Uint or
+                Format.D32FloatS8Uint or
+                Format.S8UintD24Unorm)
+            {
+                var depthTexture = CreateDepthOrStencilView(src, DepthStencilMode.Depth);
+
+                BlitDepthStencilDraw(depthTexture, isDepth: true);
+
+                if (depthTexture != src)
+                {
+                    depthTexture.Release();
+                }
+            }
+
+            if (src.Info.Format is
+                Format.S8Uint or
+                Format.D24UnormS8Uint or
+                Format.D32FloatS8Uint or
+                Format.S8UintD24Unorm)
+            {
+                var stencilTexture = CreateDepthOrStencilView(src, DepthStencilMode.Stencil);
+
+                BlitDepthStencilDraw(stencilTexture, isDepth: false);
+
+                if (stencilTexture != src)
+                {
+                    stencilTexture.Release();
+                }
+            }
+
+            // Restore previous state
+            _pipeline.SwapState(null);
+        }
+
+        private static Texture CreateDepthOrStencilView(Texture depthStencilTexture, DepthStencilMode depthStencilMode)
+        {
+            if (depthStencilTexture.Info.DepthStencilMode == depthStencilMode)
+            {
+                return depthStencilTexture;
+            }
+
+            return (Texture)depthStencilTexture.CreateView(new TextureCreateInfo(
+                depthStencilTexture.Info.Width,
+                depthStencilTexture.Info.Height,
+                depthStencilTexture.Info.Depth,
+                depthStencilTexture.Info.Levels,
+                depthStencilTexture.Info.Samples,
+                depthStencilTexture.Info.BlockWidth,
+                depthStencilTexture.Info.BlockHeight,
+                depthStencilTexture.Info.BytesPerPixel,
+                depthStencilTexture.Info.Format,
+                depthStencilMode,
+                depthStencilTexture.Info.Target,
+                SwizzleComponent.Red,
+                SwizzleComponent.Green,
+                SwizzleComponent.Blue,
+                SwizzleComponent.Alpha), 0, 0);
+        }
+
+        private void BlitDepthStencilDraw(Texture src, bool isDepth)
+        {
+            // TODO: Check this https://github.com/Ryujinx/Ryujinx/pull/5003/
+            _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, _samplerNearest);
+
+            string debugGroupName;
+
+            if (isDepth)
+            {
+                debugGroupName = "Depth Blit";
+                _pipeline.SetProgram(src.Info.Target.IsMultisample() ? _programDepthBlitMs : _programDepthBlit);
+                _pipeline.SetDepthTest(new DepthTestDescriptor(true, true, CompareOp.Always));
+            }
+            else
+            {
+                debugGroupName = "Stencil Blit";
+                _pipeline.SetProgram(src.Info.Target.IsMultisample() ? _programStencilBlitMs : _programStencilBlit);
+                _pipeline.SetStencilTest(CreateStencilTestDescriptor(true));
+            }
+
+            _pipeline.Draw(4, 1, 0, 0, debugGroupName);
+
+            if (isDepth)
+            {
+                _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always));
+            }
+            else
+            {
+                _pipeline.SetStencilTest(CreateStencilTestDescriptor(false));
+            }
+        }
+
+        public unsafe void DrawTexture(
+            ITexture src,
+            ISampler srcSampler,
+            Extents2DF srcRegion,
+            Extents2DF dstRegion)
+        {
+            // Save current state
+            var state = _pipeline.SavePredrawState();
+
+            _pipeline.SetFaceCulling(false, Face.Front);
+            _pipeline.SetStencilTest(new StencilTestDescriptor());
+            _pipeline.SetDepthTest(new DepthTestDescriptor());
+
+            const int RegionBufferSize = 16;
+
+            _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, srcSampler);
+
+            Span<float> region = stackalloc float[RegionBufferSize / sizeof(float)];
+
+            region[0] = srcRegion.X1 / src.Width;
+            region[1] = srcRegion.X2 / src.Width;
+            region[2] = srcRegion.Y1 / src.Height;
+            region[3] = srcRegion.Y2 / src.Height;
+
+            if (dstRegion.X1 > dstRegion.X2)
+            {
+                (region[0], region[1]) = (region[1], region[0]);
+            }
+
+            if (dstRegion.Y1 > dstRegion.Y2)
+            {
+                (region[2], region[3]) = (region[3], region[2]);
+            }
+
+            var bufferHandle = _renderer.BufferManager.CreateWithHandle(RegionBufferSize);
+            _renderer.BufferManager.SetData<float>(bufferHandle, 0, region);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, new BufferRange(bufferHandle, 0, RegionBufferSize))]);
+
+            Span<Viewport> viewports = stackalloc Viewport[16];
+
+            var rect = new Rectangle<float>(
+                MathF.Min(dstRegion.X1, dstRegion.X2),
+                MathF.Min(dstRegion.Y1, dstRegion.Y2),
+                MathF.Abs(dstRegion.X2 - dstRegion.X1),
+                MathF.Abs(dstRegion.Y2 - dstRegion.Y1));
+
+            viewports[0] = new Viewport(
+                rect,
+                ViewportSwizzle.PositiveX,
+                ViewportSwizzle.PositiveY,
+                ViewportSwizzle.PositiveZ,
+                ViewportSwizzle.PositiveW,
+                0f,
+                1f);
+
+            _pipeline.SetProgram(_programColorBlitF);
+            _pipeline.SetViewports(viewports);
+            _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
+            _pipeline.Draw(4, 1, 0, 0, "Draw Texture");
+
+            _renderer.BufferManager.Delete(bufferHandle);
+
+            // Restore previous state
+            _pipeline.RestorePredrawState(state);
+        }
+
+        public void ConvertI8ToI16(CommandBufferScoped cbs, BufferHolder src, BufferHolder dst, int srcOffset, int size)
+        {
+            ChangeStride(cbs, src, dst, srcOffset, size, 1, 2);
+        }
+
+        public unsafe void ChangeStride(
+            CommandBufferScoped cbs,
+            BufferHolder src,
+            BufferHolder dst,
+            int srcOffset,
+            int size,
+            int stride,
+            int newStride)
+        {
+            int elems = size / stride;
+
+            var srcBuffer = src.GetBuffer();
+            var dstBuffer = dst.GetBuffer();
+
+            const int ParamsBufferSize = 4 * sizeof(int);
+
+            // Save current state
+            _pipeline.SwapState(_helperShaderState);
+
+            Span<int> shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)];
+
+            shaderParams[0] = stride;
+            shaderParams[1] = newStride;
+            shaderParams[2] = size;
+            shaderParams[3] = srcOffset;
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
+            buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
+
+            sbRanges[0] = srcBuffer;
+            sbRanges[1] = dstBuffer;
+            _pipeline.SetStorageBuffers(1, sbRanges);
+
+            _pipeline.SetProgram(_programStrideChange);
+            _pipeline.DispatchCompute(1 + elems / ConvertElementsPerWorkgroup, 1, 1, "Change Stride");
+
+            // Restore previous state
+            _pipeline.SwapState(null);
+        }
+
+        public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto<DisposableBuffer> dstBuffer, int pixelCount, int dstOffset)
+        {
+            int inSize = pixelCount * 2 * sizeof(int);
+
+            var srcBuffer = src.GetBuffer();
+
+            const int ParamsBufferSize = sizeof(int) * 2;
+
+            // Save current state
+            _pipeline.SwapState(_helperShaderState);
+
+            Span<int> shaderParams = stackalloc int[2];
+
+            shaderParams[0] = pixelCount;
+            shaderParams[1] = dstOffset;
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
+            buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
+
+            sbRanges[0] = srcBuffer;
+            sbRanges[1] = dstBuffer;
+            _pipeline.SetStorageBuffers(1, sbRanges);
+
+            _pipeline.SetProgram(_programConvertD32S8ToD24S8);
+            _pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion");
+
+            // Restore previous state
+            _pipeline.SwapState(null);
+        }
+
+        public void ConvertIndexBuffer(
+            CommandBufferScoped cbs,
+            BufferHolder src,
+            BufferHolder dst,
+            IndexBufferPattern pattern,
+            int indexSize,
+            int srcOffset,
+            int indexCount)
+        {
+            // TODO: Support conversion with primitive restart enabled.
+
+            int primitiveCount = pattern.GetPrimitiveCount(indexCount);
+            int outputIndexSize = 4;
+
+            var srcBuffer = src.GetBuffer();
+            var dstBuffer = dst.GetBuffer();
+
+            const int ParamsBufferSize = 16 * sizeof(int);
+
+            // Save current state
+            _pipeline.SwapState(_helperShaderState);
+
+            Span<int> shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)];
+
+            shaderParams[8] = pattern.PrimitiveVertices;
+            shaderParams[9] = pattern.PrimitiveVerticesOut;
+            shaderParams[10] = indexSize;
+            shaderParams[11] = outputIndexSize;
+            shaderParams[12] = pattern.BaseIndex;
+            shaderParams[13] = pattern.IndexStride;
+            shaderParams[14] = srcOffset;
+            shaderParams[15] = primitiveCount;
+
+            pattern.OffsetIndex.CopyTo(shaderParams[..pattern.OffsetIndex.Length]);
+
+            using var patternScoped = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
+            patternScoped.Holder.SetDataUnchecked<int>(patternScoped.Offset, shaderParams);
+
+            Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
+
+            sbRanges[0] = srcBuffer;
+            sbRanges[1] = dstBuffer;
+            _pipeline.SetStorageBuffers(1, sbRanges);
+            _pipeline.SetStorageBuffers([new BufferAssignment(3, patternScoped.Range)]);
+
+            _pipeline.SetProgram(_programConvertIndexBuffer);
+            _pipeline.DispatchCompute(BitUtils.DivRoundUp(primitiveCount, 16), 1, 1, "Convert Index Buffer");
+
+            // Restore previous state
+            _pipeline.SwapState(null);
+        }
+
+        public unsafe void ClearColor(
+            int index,
+            ReadOnlySpan<float> clearColor,
+            uint componentMask,
+            int dstWidth,
+            int dstHeight,
+            Format format)
+        {
+            // Keep original scissor
+            DirtyFlags clearFlags = DirtyFlags.All & (~DirtyFlags.Scissors);
+
+            // Save current state
+            EncoderState originalState = _pipeline.SwapState(_helperShaderState, clearFlags, false);
+
+            // Inherit some state without fully recreating render pipeline.
+            RenderTargetCopy save = _helperShaderState.InheritForClear(originalState, false, index);
+
+            const int ClearColorBufferSize = 16;
+
+            // TODO: Flush
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(_pipeline.Cbs, ClearColorBufferSize);
+            buffer.Holder.SetDataUnchecked(buffer.Offset, clearColor);
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            Span<Viewport> viewports = stackalloc Viewport[16];
+
+            // TODO: Set exact viewport!
+            viewports[0] = new Viewport(
+                new Rectangle<float>(0, 0, dstWidth, dstHeight),
+                ViewportSwizzle.PositiveX,
+                ViewportSwizzle.PositiveY,
+                ViewportSwizzle.PositiveZ,
+                ViewportSwizzle.PositiveW,
+                0f,
+                1f);
+
+            Span<uint> componentMasks = stackalloc uint[index + 1];
+            componentMasks[index] = componentMask;
+
+            var debugGroupName = "Clear Color ";
+
+            if (format.IsSint())
+            {
+                debugGroupName += "Int";
+                _pipeline.SetProgram(_programsColorClearI[index]);
+            }
+            else if (format.IsUint())
+            {
+                debugGroupName += "UInt";
+                _pipeline.SetProgram(_programsColorClearU[index]);
+            }
+            else
+            {
+                debugGroupName += "Float";
+                _pipeline.SetProgram(_programsColorClearF[index]);
+            }
+
+            _pipeline.SetBlendState(index, new BlendDescriptor());
+            _pipeline.SetFaceCulling(false, Face.Front);
+            _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always));
+            _pipeline.SetRenderTargetColorMasks(componentMasks);
+            _pipeline.SetViewports(viewports);
+            _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
+            _pipeline.Draw(4, 1, 0, 0, debugGroupName);
+
+            // Restore previous state
+            _pipeline.SwapState(null, clearFlags, false);
+
+            _helperShaderState.Restore(save);
+        }
+
+        public unsafe void ClearDepthStencil(
+            float depthValue,
+            bool depthMask,
+            int stencilValue,
+            int stencilMask,
+            int dstWidth,
+            int dstHeight)
+        {
+            // Keep original scissor
+            DirtyFlags clearFlags = DirtyFlags.All & (~DirtyFlags.Scissors);
+            var helperScissors = _helperShaderState.Scissors;
+
+            // Save current state
+            EncoderState originalState = _pipeline.SwapState(_helperShaderState, clearFlags, false);
+
+            // Inherit some state without fully recreating render pipeline.
+            RenderTargetCopy save = _helperShaderState.InheritForClear(originalState, true);
+
+            const int ClearDepthBufferSize = 16;
+
+            using var buffer = _renderer.BufferManager.ReserveOrCreate(_pipeline.Cbs, ClearDepthBufferSize);
+            buffer.Holder.SetDataUnchecked(buffer.Offset, new ReadOnlySpan<float>(ref depthValue));
+            _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
+
+            Span<Viewport> viewports = stackalloc Viewport[1];
+
+            viewports[0] = new Viewport(
+                new Rectangle<float>(0, 0, dstWidth, dstHeight),
+                ViewportSwizzle.PositiveX,
+                ViewportSwizzle.PositiveY,
+                ViewportSwizzle.PositiveZ,
+                ViewportSwizzle.PositiveW,
+                0f,
+                1f);
+
+            _pipeline.SetProgram(_programDepthStencilClear);
+            _pipeline.SetFaceCulling(false, Face.Front);
+            _pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
+            _pipeline.SetViewports(viewports);
+            _pipeline.SetDepthTest(new DepthTestDescriptor(true, depthMask, CompareOp.Always));
+            _pipeline.SetStencilTest(CreateStencilTestDescriptor(stencilMask != 0, stencilValue, 0xFF, stencilMask));
+            _pipeline.Draw(4, 1, 0, 0, "Clear Depth Stencil");
+
+            // Cleanup
+            _pipeline.SetDepthTest(new DepthTestDescriptor(false, false, CompareOp.Always));
+            _pipeline.SetStencilTest(CreateStencilTestDescriptor(false));
+
+            // Restore previous state
+            _pipeline.SwapState(null, clearFlags, false);
+
+            _helperShaderState.Restore(save);
+        }
+
+        private static StencilTestDescriptor CreateStencilTestDescriptor(
+            bool enabled,
+            int refValue = 0,
+            int compareMask = 0xff,
+            int writeMask = 0xff)
+        {
+            return new StencilTestDescriptor(
+                enabled,
+                CompareOp.Always,
+                StencilOp.Replace,
+                StencilOp.Replace,
+                StencilOp.Replace,
+                refValue,
+                compareMask,
+                writeMask,
+                CompareOp.Always,
+                StencilOp.Replace,
+                StencilOp.Replace,
+                StencilOp.Replace,
+                refValue,
+                compareMask,
+                writeMask);
+        }
+
+        public void Dispose()
+        {
+            _programColorBlitF.Dispose();
+            _programColorBlitI.Dispose();
+            _programColorBlitU.Dispose();
+            _programColorBlitMsF.Dispose();
+            _programColorBlitMsI.Dispose();
+            _programColorBlitMsU.Dispose();
+
+            foreach (var programColorClear in _programsColorClearF)
+            {
+                programColorClear.Dispose();
+            }
+
+            foreach (var programColorClear in _programsColorClearU)
+            {
+                programColorClear.Dispose();
+            }
+
+            foreach (var programColorClear in _programsColorClearI)
+            {
+                programColorClear.Dispose();
+            }
+
+            _programDepthStencilClear.Dispose();
+            _pipeline.Dispose();
+            _samplerLinear.Dispose();
+            _samplerNearest.Dispose();
+        }
+    }
+}

+ 121 - 0
src/Ryujinx.Graphics.Metal/IdList.cs

@@ -0,0 +1,121 @@
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Metal
+{
+    class IdList<T> where T : class
+    {
+        private readonly List<T> _list;
+        private int _freeMin;
+
+        public IdList()
+        {
+            _list = new List<T>();
+            _freeMin = 0;
+        }
+
+        public int Add(T value)
+        {
+            int id;
+            int count = _list.Count;
+            id = _list.IndexOf(null, _freeMin);
+
+            if ((uint)id < (uint)count)
+            {
+                _list[id] = value;
+            }
+            else
+            {
+                id = count;
+                _freeMin = id + 1;
+
+                _list.Add(value);
+            }
+
+            return id + 1;
+        }
+
+        public void Remove(int id)
+        {
+            id--;
+
+            int count = _list.Count;
+
+            if ((uint)id >= (uint)count)
+            {
+                return;
+            }
+
+            if (id + 1 == count)
+            {
+                // Trim unused items.
+                int removeIndex = id;
+
+                while (removeIndex > 0 && _list[removeIndex - 1] == null)
+                {
+                    removeIndex--;
+                }
+
+                _list.RemoveRange(removeIndex, count - removeIndex);
+
+                if (_freeMin > removeIndex)
+                {
+                    _freeMin = removeIndex;
+                }
+            }
+            else
+            {
+                _list[id] = null;
+
+                if (_freeMin > id)
+                {
+                    _freeMin = id;
+                }
+            }
+        }
+
+        public bool TryGetValue(int id, out T value)
+        {
+            id--;
+
+            try
+            {
+                if ((uint)id < (uint)_list.Count)
+                {
+                    value = _list[id];
+                    return value != null;
+                }
+
+                value = null;
+                return false;
+            }
+            catch (ArgumentOutOfRangeException)
+            {
+                value = null;
+                return false;
+            }
+            catch (IndexOutOfRangeException)
+            {
+                value = null;
+                return false;
+            }
+        }
+
+        public void Clear()
+        {
+            _list.Clear();
+            _freeMin = 0;
+        }
+
+        public IEnumerator<T> GetEnumerator()
+        {
+            for (int i = 0; i < _list.Count; i++)
+            {
+                if (_list[i] != null)
+                {
+                    yield return _list[i];
+                }
+            }
+        }
+    }
+}

+ 74 - 0
src/Ryujinx.Graphics.Metal/ImageArray.cs

@@ -0,0 +1,74 @@
+using Ryujinx.Graphics.GAL;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    internal class ImageArray : IImageArray
+    {
+        private readonly TextureRef[] _textureRefs;
+        private readonly TextureBuffer[] _bufferTextureRefs;
+
+        private readonly bool _isBuffer;
+        private readonly Pipeline _pipeline;
+
+        public ImageArray(int size, bool isBuffer, Pipeline pipeline)
+        {
+            if (isBuffer)
+            {
+                _bufferTextureRefs = new TextureBuffer[size];
+            }
+            else
+            {
+                _textureRefs = new TextureRef[size];
+            }
+
+            _isBuffer = isBuffer;
+            _pipeline = pipeline;
+        }
+
+        public void SetImages(int index, ITexture[] images)
+        {
+            for (int i = 0; i < images.Length; i++)
+            {
+                ITexture image = images[i];
+
+                if (image is TextureBuffer textureBuffer)
+                {
+                    _bufferTextureRefs[index + i] = textureBuffer;
+                }
+                else if (image is Texture texture)
+                {
+                    _textureRefs[index + i].Storage = texture;
+                }
+                else if (!_isBuffer)
+                {
+                    _textureRefs[index + i].Storage = null;
+                }
+                else
+                {
+                    _bufferTextureRefs[index + i] = null;
+                }
+            }
+
+            SetDirty();
+        }
+
+        public TextureRef[] GetTextureRefs()
+        {
+            return _textureRefs;
+        }
+
+        public TextureBuffer[] GetBufferTextureRefs()
+        {
+            return _bufferTextureRefs;
+        }
+
+        private void SetDirty()
+        {
+            _pipeline.DirtyImages();
+        }
+
+        public void Dispose() { }
+    }
+}

+ 118 - 0
src/Ryujinx.Graphics.Metal/IndexBufferPattern.cs

@@ -0,0 +1,118 @@
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    internal class IndexBufferPattern : IDisposable
+    {
+        public int PrimitiveVertices { get; }
+        public int PrimitiveVerticesOut { get; }
+        public int BaseIndex { get; }
+        public int[] OffsetIndex { get; }
+        public int IndexStride { get; }
+        public bool RepeatStart { get; }
+
+        private readonly MetalRenderer _renderer;
+        private int _currentSize;
+        private BufferHandle _repeatingBuffer;
+
+        public IndexBufferPattern(MetalRenderer renderer,
+            int primitiveVertices,
+            int primitiveVerticesOut,
+            int baseIndex,
+            int[] offsetIndex,
+            int indexStride,
+            bool repeatStart)
+        {
+            PrimitiveVertices = primitiveVertices;
+            PrimitiveVerticesOut = primitiveVerticesOut;
+            BaseIndex = baseIndex;
+            OffsetIndex = offsetIndex;
+            IndexStride = indexStride;
+            RepeatStart = repeatStart;
+
+            _renderer = renderer;
+        }
+
+        public int GetPrimitiveCount(int vertexCount)
+        {
+            return Math.Max(0, (vertexCount - BaseIndex) / IndexStride);
+        }
+
+        public int GetConvertedCount(int indexCount)
+        {
+            int primitiveCount = GetPrimitiveCount(indexCount);
+            return primitiveCount * OffsetIndex.Length;
+        }
+
+        public BufferHandle GetRepeatingBuffer(int vertexCount, out int indexCount)
+        {
+            int primitiveCount = GetPrimitiveCount(vertexCount);
+            indexCount = primitiveCount * PrimitiveVerticesOut;
+
+            int expectedSize = primitiveCount * OffsetIndex.Length;
+
+            if (expectedSize <= _currentSize && _repeatingBuffer != BufferHandle.Null)
+            {
+                return _repeatingBuffer;
+            }
+
+            // Expand the repeating pattern to the number of requested primitives.
+            BufferHandle newBuffer = _renderer.BufferManager.CreateWithHandle(expectedSize * sizeof(int));
+
+            // Copy the old data to the new one.
+            if (_repeatingBuffer != BufferHandle.Null)
+            {
+                _renderer.Pipeline.CopyBuffer(_repeatingBuffer, newBuffer, 0, 0, _currentSize * sizeof(int));
+                _renderer.BufferManager.Delete(_repeatingBuffer);
+            }
+
+            _repeatingBuffer = newBuffer;
+
+            // Add the additional repeats on top.
+            int newPrimitives = primitiveCount;
+            int oldPrimitives = (_currentSize) / OffsetIndex.Length;
+
+            int[] newData;
+
+            newPrimitives -= oldPrimitives;
+            newData = new int[expectedSize - _currentSize];
+
+            int outOffset = 0;
+            int index = oldPrimitives * IndexStride + BaseIndex;
+
+            for (int i = 0; i < newPrimitives; i++)
+            {
+                if (RepeatStart)
+                {
+                    // Used for triangle fan
+                    newData[outOffset++] = 0;
+                }
+
+                for (int j = RepeatStart ? 1 : 0; j < OffsetIndex.Length; j++)
+                {
+                    newData[outOffset++] = index + OffsetIndex[j];
+                }
+
+                index += IndexStride;
+            }
+
+            _renderer.SetBufferData(newBuffer, _currentSize * sizeof(int), MemoryMarshal.Cast<int, byte>(newData));
+            _currentSize = expectedSize;
+
+            return newBuffer;
+        }
+
+        public void Dispose()
+        {
+            if (_repeatingBuffer != BufferHandle.Null)
+            {
+                _renderer.BufferManager.Delete(_repeatingBuffer);
+                _repeatingBuffer = BufferHandle.Null;
+            }
+        }
+    }
+}

+ 103 - 0
src/Ryujinx.Graphics.Metal/IndexBufferState.cs

@@ -0,0 +1,103 @@
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly internal struct IndexBufferState
+    {
+        public static IndexBufferState Null => new(BufferHandle.Null, 0, 0);
+
+        private readonly int _offset;
+        private readonly int _size;
+        private readonly IndexType _type;
+
+        private readonly BufferHandle _handle;
+
+        public IndexBufferState(BufferHandle handle, int offset, int size, IndexType type = IndexType.UInt)
+        {
+            _handle = handle;
+            _offset = offset;
+            _size = size;
+            _type = type;
+        }
+
+        public (MTLBuffer, int, MTLIndexType) GetIndexBuffer(MetalRenderer renderer, CommandBufferScoped cbs)
+        {
+            Auto<DisposableBuffer> autoBuffer;
+            int offset, size;
+            MTLIndexType type;
+
+            if (_type == IndexType.UByte)
+            {
+                // Index type is not supported. Convert to I16.
+                autoBuffer = renderer.BufferManager.GetBufferI8ToI16(cbs, _handle, _offset, _size);
+
+                type = MTLIndexType.UInt16;
+                offset = 0;
+                size = _size * 2;
+            }
+            else
+            {
+                autoBuffer = renderer.BufferManager.GetBuffer(_handle, false, out int bufferSize);
+
+                if (_offset >= bufferSize)
+                {
+                    autoBuffer = null;
+                }
+
+                type = _type.Convert();
+                offset = _offset;
+                size = _size;
+            }
+
+            if (autoBuffer != null)
+            {
+                DisposableBuffer buffer = autoBuffer.Get(cbs, offset, size);
+
+                return (buffer.Value, offset, type);
+            }
+
+            return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt16);
+        }
+
+        public (MTLBuffer, int, MTLIndexType) GetConvertedIndexBuffer(
+            MetalRenderer renderer,
+            CommandBufferScoped cbs,
+            int firstIndex,
+            int indexCount,
+            int convertedCount,
+            IndexBufferPattern pattern)
+        {
+            // Convert the index buffer using the given pattern.
+            int indexSize = GetIndexSize();
+
+            int firstIndexOffset = firstIndex * indexSize;
+
+            var autoBuffer = renderer.BufferManager.GetBufferTopologyConversion(cbs, _handle, _offset + firstIndexOffset, indexCount * indexSize, pattern, indexSize);
+
+            int size = convertedCount * 4;
+
+            if (autoBuffer != null)
+            {
+                DisposableBuffer buffer = autoBuffer.Get(cbs, 0, size);
+
+                return (buffer.Value, 0, MTLIndexType.UInt32);
+            }
+
+            return (new MTLBuffer(IntPtr.Zero), 0, MTLIndexType.UInt32);
+        }
+
+        private int GetIndexSize()
+        {
+            return _type switch
+            {
+                IndexType.UInt => 4,
+                IndexType.UShort => 2,
+                _ => 1,
+            };
+        }
+    }
+}

+ 309 - 0
src/Ryujinx.Graphics.Metal/MetalRenderer.cs

@@ -0,0 +1,309 @@
+using Ryujinx.Common.Configuration;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader.Translation;
+using SharpMetal.Metal;
+using SharpMetal.QuartzCore;
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    public sealed class MetalRenderer : IRenderer
+    {
+        public const int TotalSets = 4;
+
+        private readonly MTLDevice _device;
+        private readonly MTLCommandQueue _queue;
+        private readonly Func<CAMetalLayer> _getMetalLayer;
+
+        private Pipeline _pipeline;
+        private Window _window;
+        
+        public uint ProgramCount { get; set; } = 0;
+
+        public event EventHandler<ScreenCaptureImageInfo> ScreenCaptured;
+        public bool PreferThreading => true;
+        public IPipeline Pipeline => _pipeline;
+        public IWindow Window => _window;
+
+        internal MTLCommandQueue BackgroundQueue { get; private set; }
+        internal HelperShader HelperShader { get; private set; }
+        internal BufferManager BufferManager { get; private set; }
+        internal CommandBufferPool CommandBufferPool { get; private set; }
+        internal BackgroundResources BackgroundResources { get; private set; }
+        internal Action<Action> InterruptAction { get; private set; }
+        internal SyncManager SyncManager { get; private set; }
+
+        internal HashSet<Program> Programs { get; }
+        internal HashSet<SamplerHolder> Samplers { get; }
+
+        public MetalRenderer(Func<CAMetalLayer> metalLayer)
+        {
+            _device = MTLDevice.CreateSystemDefaultDevice();
+            Programs = new HashSet<Program>();
+            Samplers = new HashSet<SamplerHolder>();
+
+            if (_device.ArgumentBuffersSupport != MTLArgumentBuffersTier.Tier2)
+            {
+                throw new NotSupportedException("Metal backend requires Tier 2 Argument Buffer support.");
+            }
+
+            _queue = _device.NewCommandQueue(CommandBufferPool.MaxCommandBuffers + 1);
+            BackgroundQueue = _device.NewCommandQueue(CommandBufferPool.MaxCommandBuffers);
+
+            _getMetalLayer = metalLayer;
+        }
+
+        public void Initialize(GraphicsDebugLevel logLevel)
+        {
+            var layer = _getMetalLayer();
+            layer.Device = _device;
+            layer.FramebufferOnly = false;
+
+            CommandBufferPool = new CommandBufferPool(_queue);
+            _window = new Window(this, layer);
+            _pipeline = new Pipeline(_device, this);
+            BufferManager = new BufferManager(_device, this, _pipeline);
+
+            _pipeline.InitEncoderStateManager(BufferManager);
+
+            BackgroundResources = new BackgroundResources(this);
+            HelperShader = new HelperShader(_device, this, _pipeline);
+            SyncManager = new SyncManager(this);
+        }
+
+        public void BackgroundContextAction(Action action, bool alwaysBackground = false)
+        {
+            // GetData methods should be thread safe, so we can call this directly.
+            // Texture copy (scaled) may also happen in here, so that should also be thread safe.
+
+            action();
+        }
+
+        public BufferHandle CreateBuffer(int size, BufferAccess access)
+        {
+            return BufferManager.CreateWithHandle(size);
+        }
+
+        public BufferHandle CreateBuffer(IntPtr pointer, int size)
+        {
+            return BufferManager.Create(pointer, size);
+        }
+
+        public BufferHandle CreateBufferSparse(ReadOnlySpan<BufferRange> storageBuffers)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IImageArray CreateImageArray(int size, bool isBuffer)
+        {
+            return new ImageArray(size, isBuffer, _pipeline);
+        }
+
+        public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info)
+        {
+            ProgramCount++;
+            return new Program(this, _device, shaders, info.ResourceLayout, info.ComputeLocalSize);
+        }
+
+        public ISampler CreateSampler(SamplerCreateInfo info)
+        {
+            return new SamplerHolder(this, _device, info);
+        }
+
+        public ITexture CreateTexture(TextureCreateInfo info)
+        {
+            if (info.Target == Target.TextureBuffer)
+            {
+                return new TextureBuffer(_device, this, _pipeline, info);
+            }
+
+            return new Texture(_device, this, _pipeline, info);
+        }
+
+        public ITextureArray CreateTextureArray(int size, bool isBuffer)
+        {
+            return new TextureArray(size, isBuffer, _pipeline);
+        }
+
+        public bool PrepareHostMapping(IntPtr address, ulong size)
+        {
+            // TODO: Metal Host Mapping
+            return false;
+        }
+
+        public void CreateSync(ulong id, bool strict)
+        {
+            SyncManager.Create(id, strict);
+        }
+
+        public void DeleteBuffer(BufferHandle buffer)
+        {
+            BufferManager.Delete(buffer);
+        }
+
+        public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size)
+        {
+            return BufferManager.GetData(buffer, offset, size);
+        }
+
+        public Capabilities GetCapabilities()
+        {
+            // TODO: Finalize these values
+            return new Capabilities(
+                api: TargetApi.Metal,
+                vendorName: HardwareInfoTools.GetVendor(),
+                SystemMemoryType.UnifiedMemory,
+                hasFrontFacingBug: false,
+                hasVectorIndexingBug: false,
+                needsFragmentOutputSpecialization: true,
+                reduceShaderPrecision: true,
+                supportsAstcCompression: true,
+                supportsBc123Compression: true,
+                supportsBc45Compression: true,
+                supportsBc67Compression: true,
+                supportsEtc2Compression: true,
+                supports3DTextureCompression: true,
+                supportsBgraFormat: true,
+                supportsR4G4Format: false,
+                supportsR4G4B4A4Format: true,
+                supportsScaledVertexFormats: false,
+                supportsSnormBufferTextureFormat: true,
+                supportsSparseBuffer: false,
+                supports5BitComponentFormat: true,
+                supportsBlendEquationAdvanced: false,
+                supportsFragmentShaderInterlock: true,
+                supportsFragmentShaderOrderingIntel: false,
+                supportsGeometryShader: false,
+                supportsGeometryShaderPassthrough: false,
+                supportsTransformFeedback: false,
+                supportsImageLoadFormatted: false,
+                supportsLayerVertexTessellation: false,
+                supportsMismatchingViewFormat: true,
+                supportsCubemapView: true,
+                supportsNonConstantTextureOffset: false,
+                supportsQuads: false,
+                supportsSeparateSampler: true,
+                supportsShaderBallot: false,
+                supportsShaderBarrierDivergence: false,
+                supportsShaderFloat64: false,
+                supportsTextureGatherOffsets: false,
+                supportsTextureShadowLod: false,
+                supportsVertexStoreAndAtomics: false,
+                supportsViewportIndexVertexTessellation: false,
+                supportsViewportMask: false,
+                supportsViewportSwizzle: false,
+                supportsIndirectParameters: true,
+                supportsDepthClipControl: false,
+                uniformBufferSetIndex: (int)Constants.ConstantBuffersSetIndex,
+                storageBufferSetIndex: (int)Constants.StorageBuffersSetIndex,
+                textureSetIndex: (int)Constants.TexturesSetIndex,
+                imageSetIndex: (int)Constants.ImagesSetIndex,
+                extraSetBaseIndex: TotalSets,
+                maximumExtraSets: (int)Constants.MaximumExtraSets,
+                maximumUniformBuffersPerStage: Constants.MaxUniformBuffersPerStage,
+                maximumStorageBuffersPerStage: Constants.MaxStorageBuffersPerStage,
+                maximumTexturesPerStage: Constants.MaxTexturesPerStage,
+                maximumImagesPerStage: Constants.MaxImagesPerStage,
+                maximumComputeSharedMemorySize: (int)_device.MaxThreadgroupMemoryLength,
+                maximumSupportedAnisotropy: 16,
+                shaderSubgroupSize: 256,
+                storageBufferOffsetAlignment: 16,
+                textureBufferOffsetAlignment: 16,
+                gatherBiasPrecision: 0,
+                maximumGpuMemory: 0
+            );
+        }
+
+        public ulong GetCurrentSync()
+        {
+            return SyncManager.GetCurrent();
+        }
+
+        public HardwareInfo GetHardwareInfo()
+        {
+            return new HardwareInfo(HardwareInfoTools.GetVendor(), HardwareInfoTools.GetModel(), "Apple");
+        }
+
+        public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
+        {
+            BufferManager.SetData(buffer, offset, data, _pipeline.Cbs);
+        }
+
+        public void UpdateCounters()
+        {
+            // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc
+        }
+
+        public void PreFrame()
+        {
+            SyncManager.Cleanup();
+        }
+
+        public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler, float divisor, bool hostReserved)
+        {
+            // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc
+            var counterEvent = new CounterEvent();
+            resultHandler?.Invoke(counterEvent, type == CounterType.SamplesPassed ? (ulong)1 : 0);
+            return counterEvent;
+        }
+
+        public void ResetCounter(CounterType type)
+        {
+            // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/creating_a_counter_sample_buffer_to_store_a_gpu_s_counter_data_during_a_pass?language=objc
+        }
+
+        public void WaitSync(ulong id)
+        {
+            SyncManager.Wait(id);
+        }
+
+        public void FlushAllCommands()
+        {
+            _pipeline.FlushCommandsImpl();
+        }
+
+        public void RegisterFlush()
+        {
+            SyncManager.RegisterFlush();
+
+            // Periodically free unused regions of the staging buffer to avoid doing it all at once.
+            BufferManager.StagingBuffer.FreeCompleted();
+        }
+
+        public void SetInterruptAction(Action<Action> interruptAction)
+        {
+            InterruptAction = interruptAction;
+        }
+
+        public void Screenshot()
+        {
+            // TODO: Screenshots
+        }
+
+        public void Dispose()
+        {
+            BackgroundResources.Dispose();
+
+            foreach (var program in Programs)
+            {
+                program.Dispose();
+            }
+
+            foreach (var sampler in Samplers)
+            {
+                sampler.Dispose();
+            }
+
+            _pipeline.Dispose();
+            _window.Dispose();
+        }
+    }
+}

+ 262 - 0
src/Ryujinx.Graphics.Metal/MultiFenceHolder.cs

@@ -0,0 +1,262 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    /// <summary>
+    /// Holder for multiple host GPU fences.
+    /// </summary>
+    [SupportedOSPlatform("macos")]
+    class MultiFenceHolder
+    {
+        private const int BufferUsageTrackingGranularity = 4096;
+
+        private readonly FenceHolder[] _fences;
+        private readonly BufferUsageBitmap _bufferUsageBitmap;
+
+        /// <summary>
+        /// Creates a new instance of the multiple fence holder.
+        /// </summary>
+        public MultiFenceHolder()
+        {
+            _fences = new FenceHolder[CommandBufferPool.MaxCommandBuffers];
+        }
+
+        /// <summary>
+        /// Creates a new instance of the multiple fence holder, with a given buffer size in mind.
+        /// </summary>
+        /// <param name="size">Size of the buffer</param>
+        public MultiFenceHolder(int size)
+        {
+            _fences = new FenceHolder[CommandBufferPool.MaxCommandBuffers];
+            _bufferUsageBitmap = new BufferUsageBitmap(size, BufferUsageTrackingGranularity);
+        }
+
+        /// <summary>
+        /// Adds read/write buffer usage information to the uses list.
+        /// </summary>
+        /// <param name="cbIndex">Index of the command buffer where the buffer is used</param>
+        /// <param name="offset">Offset of the buffer being used</param>
+        /// <param name="size">Size of the buffer region being used, in bytes</param>
+        /// <param name="write">Whether the access is a write or not</param>
+        public void AddBufferUse(int cbIndex, int offset, int size, bool write)
+        {
+            _bufferUsageBitmap.Add(cbIndex, offset, size, false);
+
+            if (write)
+            {
+                _bufferUsageBitmap.Add(cbIndex, offset, size, true);
+            }
+        }
+
+        /// <summary>
+        /// Removes all buffer usage information for a given command buffer.
+        /// </summary>
+        /// <param name="cbIndex">Index of the command buffer where the buffer is used</param>
+        public void RemoveBufferUses(int cbIndex)
+        {
+            _bufferUsageBitmap?.Clear(cbIndex);
+        }
+
+        /// <summary>
+        /// Checks if a given range of a buffer is being used by a command buffer still being processed by the GPU.
+        /// </summary>
+        /// <param name="cbIndex">Index of the command buffer where the buffer is used</param>
+        /// <param name="offset">Offset of the buffer being used</param>
+        /// <param name="size">Size of the buffer region being used, in bytes</param>
+        /// <returns>True if in use, false otherwise</returns>
+        public bool IsBufferRangeInUse(int cbIndex, int offset, int size)
+        {
+            return _bufferUsageBitmap.OverlapsWith(cbIndex, offset, size);
+        }
+
+        /// <summary>
+        /// Checks if a given range of a buffer is being used by any command buffer still being processed by the GPU.
+        /// </summary>
+        /// <param name="offset">Offset of the buffer being used</param>
+        /// <param name="size">Size of the buffer region being used, in bytes</param>
+        /// <param name="write">True if only write usages should count</param>
+        /// <returns>True if in use, false otherwise</returns>
+        public bool IsBufferRangeInUse(int offset, int size, bool write)
+        {
+            return _bufferUsageBitmap.OverlapsWith(offset, size, write);
+        }
+
+        /// <summary>
+        /// Adds a fence to the holder.
+        /// </summary>
+        /// <param name="cbIndex">Command buffer index of the command buffer that owns the fence</param>
+        /// <param name="fence">Fence to be added</param>
+        /// <returns>True if the command buffer's previous fence value was null</returns>
+        public bool AddFence(int cbIndex, FenceHolder fence)
+        {
+            ref FenceHolder fenceRef = ref _fences[cbIndex];
+
+            if (fenceRef == null)
+            {
+                fenceRef = fence;
+                return true;
+            }
+
+            return false;
+        }
+
+        /// <summary>
+        /// Removes a fence from the holder.
+        /// </summary>
+        /// <param name="cbIndex">Command buffer index of the command buffer that owns the fence</param>
+        public void RemoveFence(int cbIndex)
+        {
+            _fences[cbIndex] = null;
+        }
+
+        /// <summary>
+        /// Determines if a fence referenced on the given command buffer.
+        /// </summary>
+        /// <param name="cbIndex">Index of the command buffer to check if it's used</param>
+        /// <returns>True if referenced, false otherwise</returns>
+        public bool HasFence(int cbIndex)
+        {
+            return _fences[cbIndex] != null;
+        }
+
+        /// <summary>
+        /// Wait until all the fences on the holder are signaled.
+        /// </summary>
+        public void WaitForFences()
+        {
+            WaitForFencesImpl(0, 0, true);
+        }
+
+        /// <summary>
+        /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled.
+        /// </summary>
+        /// <param name="offset">Start offset of the buffer range</param>
+        /// <param name="size">Size of the buffer range in bytes</param>
+        public void WaitForFences(int offset, int size)
+        {
+            WaitForFencesImpl(offset, size, true);
+        }
+
+        /// <summary>
+        /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled.
+        /// </summary>
+
+        // TODO: Add a proper timeout!
+        public bool WaitForFences(bool indefinite)
+        {
+            return WaitForFencesImpl(0, 0, indefinite);
+        }
+
+        /// <summary>
+        /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled.
+        /// </summary>
+        /// <param name="offset">Start offset of the buffer range</param>
+        /// <param name="size">Size of the buffer range in bytes</param>
+        /// <param name="indefinite">Indicates if this should wait indefinitely</param>
+        /// <returns>True if all fences were signaled before the timeout expired, false otherwise</returns>
+        private bool WaitForFencesImpl(int offset, int size, bool indefinite)
+        {
+            Span<FenceHolder> fenceHolders = new FenceHolder[CommandBufferPool.MaxCommandBuffers];
+
+            int count = size != 0 ? GetOverlappingFences(fenceHolders, offset, size) : GetFences(fenceHolders);
+            Span<MTLCommandBuffer> fences = stackalloc MTLCommandBuffer[count];
+
+            int fenceCount = 0;
+
+            for (int i = 0; i < count; i++)
+            {
+                if (fenceHolders[i].TryGet(out MTLCommandBuffer fence))
+                {
+                    fences[fenceCount] = fence;
+
+                    if (fenceCount < i)
+                    {
+                        fenceHolders[fenceCount] = fenceHolders[i];
+                    }
+
+                    fenceCount++;
+                }
+            }
+
+            if (fenceCount == 0)
+            {
+                return true;
+            }
+
+            bool signaled = true;
+
+            if (indefinite)
+            {
+                foreach (var fence in fences)
+                {
+                    fence.WaitUntilCompleted();
+                }
+            }
+            else
+            {
+                foreach (var fence in fences)
+                {
+                    if (fence.Status != MTLCommandBufferStatus.Completed)
+                    {
+                        signaled = false;
+                    }
+                }
+            }
+
+            for (int i = 0; i < fenceCount; i++)
+            {
+                fenceHolders[i].Put();
+            }
+
+            return signaled;
+        }
+
+        /// <summary>
+        /// Gets fences to wait for.
+        /// </summary>
+        /// <param name="storage">Span to store fences in</param>
+        /// <returns>Number of fences placed in storage</returns>
+        private int GetFences(Span<FenceHolder> storage)
+        {
+            int count = 0;
+
+            for (int i = 0; i < _fences.Length; i++)
+            {
+                var fence = _fences[i];
+
+                if (fence != null)
+                {
+                    storage[count++] = fence;
+                }
+            }
+
+            return count;
+        }
+
+        /// <summary>
+        /// Gets fences to wait for use of a given buffer region.
+        /// </summary>
+        /// <param name="storage">Span to store overlapping fences in</param>
+        /// <param name="offset">Offset of the range</param>
+        /// <param name="size">Size of the range in bytes</param>
+        /// <returns>Number of fences for the specified region placed in storage</returns>
+        private int GetOverlappingFences(Span<FenceHolder> storage, int offset, int size)
+        {
+            int count = 0;
+
+            for (int i = 0; i < _fences.Length; i++)
+            {
+                var fence = _fences[i];
+
+                if (fence != null && _bufferUsageBitmap.OverlapsWith(i, offset, size))
+                {
+                    storage[count++] = fence;
+                }
+            }
+
+            return count;
+        }
+    }
+}

+ 99 - 0
src/Ryujinx.Graphics.Metal/PersistentFlushBuffer.cs

@@ -0,0 +1,99 @@
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    internal class PersistentFlushBuffer : IDisposable
+    {
+        private readonly MetalRenderer _renderer;
+
+        private BufferHolder _flushStorage;
+
+        public PersistentFlushBuffer(MetalRenderer renderer)
+        {
+            _renderer = renderer;
+        }
+
+        private BufferHolder ResizeIfNeeded(int size)
+        {
+            var flushStorage = _flushStorage;
+
+            if (flushStorage == null || size > _flushStorage.Size)
+            {
+                flushStorage?.Dispose();
+
+                flushStorage = _renderer.BufferManager.Create(size);
+                _flushStorage = flushStorage;
+            }
+
+            return flushStorage;
+        }
+
+        public Span<byte> GetBufferData(CommandBufferPool cbp, BufferHolder buffer, int offset, int size)
+        {
+            var flushStorage = ResizeIfNeeded(size);
+            Auto<DisposableBuffer> srcBuffer;
+
+            using (var cbs = cbp.Rent())
+            {
+                srcBuffer = buffer.GetBuffer();
+                var dstBuffer = flushStorage.GetBuffer();
+
+                if (srcBuffer.TryIncrementReferenceCount())
+                {
+                    BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, 0, size, registerSrcUsage: false);
+                }
+                else
+                {
+                    // Source buffer is no longer alive, don't copy anything to flush storage.
+                    srcBuffer = null;
+                }
+            }
+
+            flushStorage.WaitForFences();
+            srcBuffer?.DecrementReferenceCount();
+            return flushStorage.GetDataStorage(0, size);
+        }
+
+        public Span<byte> GetTextureData(CommandBufferPool cbp, Texture view, int size)
+        {
+            TextureCreateInfo info = view.Info;
+
+            var flushStorage = ResizeIfNeeded(size);
+
+            using (var cbs = cbp.Rent())
+            {
+                var buffer = flushStorage.GetBuffer().Get(cbs).Value;
+                var image = view.GetHandle();
+
+                view.CopyFromOrToBuffer(cbs, buffer, image, size, true, 0, 0, info.GetLayers(), info.Levels, singleSlice: false);
+            }
+
+            flushStorage.WaitForFences();
+            return flushStorage.GetDataStorage(0, size);
+        }
+
+        public Span<byte> GetTextureData(CommandBufferPool cbp, Texture view, int size, int layer, int level)
+        {
+            var flushStorage = ResizeIfNeeded(size);
+
+            using (var cbs = cbp.Rent())
+            {
+                var buffer = flushStorage.GetBuffer().Get(cbs).Value;
+                var image = view.GetHandle();
+
+                view.CopyFromOrToBuffer(cbs, buffer, image, size, true, layer, level, 1, 1, singleSlice: true);
+            }
+
+            flushStorage.WaitForFences();
+            return flushStorage.GetDataStorage(0, size);
+        }
+
+        public void Dispose()
+        {
+            _flushStorage.Dispose();
+        }
+    }
+}

+ 877 - 0
src/Ryujinx.Graphics.Metal/Pipeline.cs

@@ -0,0 +1,877 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using SharpMetal.Foundation;
+using SharpMetal.Metal;
+using SharpMetal.QuartzCore;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    public enum EncoderType
+    {
+        Blit,
+        Compute,
+        Render,
+        None
+    }
+
+    [SupportedOSPlatform("macos")]
+    class Pipeline : IPipeline, IEncoderFactory, IDisposable
+    {
+        private const ulong MinByteWeightForFlush = 256 * 1024 * 1024; // MiB
+
+        private readonly MTLDevice _device;
+        private readonly MetalRenderer _renderer;
+        private EncoderStateManager _encoderStateManager;
+        private ulong _byteWeight;
+
+        public MTLCommandBuffer CommandBuffer;
+
+        public IndexBufferPattern QuadsToTrisPattern;
+        public IndexBufferPattern TriFanToTrisPattern;
+
+        internal CommandBufferScoped? PreloadCbs { get; private set; }
+        internal CommandBufferScoped Cbs { get; private set; }
+        internal CommandBufferEncoder Encoders => Cbs.Encoders;
+        internal EncoderType CurrentEncoderType => Encoders.CurrentEncoderType;
+
+        public Pipeline(MTLDevice device, MetalRenderer renderer)
+        {
+            _device = device;
+            _renderer = renderer;
+
+            renderer.CommandBufferPool.Initialize(this);
+
+            CommandBuffer = (Cbs = _renderer.CommandBufferPool.Rent()).CommandBuffer;
+        }
+
+        internal void InitEncoderStateManager(BufferManager bufferManager)
+        {
+            _encoderStateManager = new EncoderStateManager(_device, bufferManager, this);
+
+            QuadsToTrisPattern = new IndexBufferPattern(_renderer, 4, 6, 0, [0, 1, 2, 0, 2, 3], 4, false);
+            TriFanToTrisPattern = new IndexBufferPattern(_renderer, 3, 3, 2, [int.MinValue, -1, 0], 1, true);
+        }
+
+        public EncoderState SwapState(EncoderState state, DirtyFlags flags = DirtyFlags.All, bool endRenderPass = true)
+        {
+            if (endRenderPass && CurrentEncoderType == EncoderType.Render)
+            {
+                EndCurrentPass();
+            }
+
+            return _encoderStateManager.SwapState(state, flags);
+        }
+
+        public PredrawState SavePredrawState()
+        {
+            return _encoderStateManager.SavePredrawState();
+        }
+
+        public void RestorePredrawState(PredrawState state)
+        {
+            _encoderStateManager.RestorePredrawState(state);
+        }
+
+        public void SetClearLoadAction(bool clear)
+        {
+            _encoderStateManager.SetClearLoadAction(clear);
+        }
+
+        public MTLRenderCommandEncoder GetOrCreateRenderEncoder(bool forDraw = false)
+        {
+            // Mark all state as dirty to ensure it is set on the new encoder
+            if (Cbs.Encoders.CurrentEncoderType != EncoderType.Render)
+            {
+                _encoderStateManager.SignalRenderDirty();
+            }
+
+            if (forDraw)
+            {
+                _encoderStateManager.RenderResourcesPrepass();
+            }
+
+            MTLRenderCommandEncoder renderCommandEncoder = Cbs.Encoders.EnsureRenderEncoder();
+
+            if (forDraw)
+            {
+                _encoderStateManager.RebindRenderState(renderCommandEncoder);
+            }
+
+            return renderCommandEncoder;
+        }
+
+        public MTLBlitCommandEncoder GetOrCreateBlitEncoder()
+        {
+            return Cbs.Encoders.EnsureBlitEncoder();
+        }
+
+        public MTLComputeCommandEncoder GetOrCreateComputeEncoder(bool forDispatch = false)
+        {
+            // Mark all state as dirty to ensure it is set on the new encoder
+            if (Cbs.Encoders.CurrentEncoderType != EncoderType.Compute)
+            {
+                _encoderStateManager.SignalComputeDirty();
+            }
+
+            if (forDispatch)
+            {
+                _encoderStateManager.ComputeResourcesPrepass();
+            }
+
+            MTLComputeCommandEncoder computeCommandEncoder = Cbs.Encoders.EnsureComputeEncoder();
+
+            if (forDispatch)
+            {
+                _encoderStateManager.RebindComputeState(computeCommandEncoder);
+            }
+
+            return computeCommandEncoder;
+        }
+
+        public void EndCurrentPass()
+        {
+            Cbs.Encoders.EndCurrentPass();
+        }
+
+        public MTLRenderCommandEncoder CreateRenderCommandEncoder()
+        {
+            return _encoderStateManager.CreateRenderCommandEncoder();
+        }
+
+        public MTLComputeCommandEncoder CreateComputeCommandEncoder()
+        {
+            return _encoderStateManager.CreateComputeCommandEncoder();
+        }
+
+        public void Present(CAMetalDrawable drawable, Texture src, Extents2D srcRegion, Extents2D dstRegion, bool isLinear)
+        {
+            // TODO: Clean this up
+            var textureInfo = new TextureCreateInfo((int)drawable.Texture.Width, (int)drawable.Texture.Height, (int)drawable.Texture.Depth, (int)drawable.Texture.MipmapLevelCount, (int)drawable.Texture.SampleCount, 0, 0, 0, Format.B8G8R8A8Unorm, 0, Target.Texture2D, SwizzleComponent.Red, SwizzleComponent.Green, SwizzleComponent.Blue, SwizzleComponent.Alpha);
+            var dst = new Texture(_device, _renderer, this, textureInfo, drawable.Texture, 0, 0);
+
+            _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, isLinear, true);
+
+            EndCurrentPass();
+
+            Cbs.CommandBuffer.PresentDrawable(drawable);
+
+            FlushCommandsImpl();
+
+            // TODO: Auto flush counting
+            _renderer.SyncManager.GetAndResetWaitTicks();
+
+            // Cleanup
+            dst.Dispose();
+        }
+
+        public CommandBufferScoped GetPreloadCommandBuffer()
+        {
+            PreloadCbs ??= _renderer.CommandBufferPool.Rent();
+
+            return PreloadCbs.Value;
+        }
+
+        public void FlushCommandsIfWeightExceeding(IAuto disposedResource, ulong byteWeight)
+        {
+            bool usedByCurrentCb = disposedResource.HasCommandBufferDependency(Cbs);
+
+            if (PreloadCbs != null && !usedByCurrentCb)
+            {
+                usedByCurrentCb = disposedResource.HasCommandBufferDependency(PreloadCbs.Value);
+            }
+
+            if (usedByCurrentCb)
+            {
+                // Since we can only free memory after the command buffer that uses a given resource was executed,
+                // keeping the command buffer might cause a high amount of memory to be in use.
+                // To prevent that, we force submit command buffers if the memory usage by resources
+                // in use by the current command buffer is above a given limit, and those resources were disposed.
+                _byteWeight += byteWeight;
+
+                if (_byteWeight >= MinByteWeightForFlush)
+                {
+                    FlushCommandsImpl();
+                }
+            }
+        }
+
+        public void FlushCommandsImpl()
+        {
+            EndCurrentPass();
+
+            _byteWeight = 0;
+
+            if (PreloadCbs != null)
+            {
+                PreloadCbs.Value.Dispose();
+                PreloadCbs = null;
+            }
+
+            CommandBuffer = (Cbs = _renderer.CommandBufferPool.ReturnAndRent(Cbs)).CommandBuffer;
+            _renderer.RegisterFlush();
+        }
+
+        public void DirtyTextures()
+        {
+            _encoderStateManager.DirtyTextures();
+        }
+
+        public void DirtyImages()
+        {
+            _encoderStateManager.DirtyImages();
+        }
+
+        public void Blit(
+            Texture src,
+            Texture dst,
+            Extents2D srcRegion,
+            Extents2D dstRegion,
+            bool isDepthOrStencil,
+            bool linearFilter)
+        {
+            if (isDepthOrStencil)
+            {
+                _renderer.HelperShader.BlitDepthStencil(Cbs, src, dst, srcRegion, dstRegion);
+            }
+            else
+            {
+                _renderer.HelperShader.BlitColor(Cbs, src, dst, srcRegion, dstRegion, linearFilter);
+            }
+        }
+
+        public void Barrier()
+        {
+            switch (CurrentEncoderType)
+            {
+                case EncoderType.Render:
+                    {
+                        var scope = MTLBarrierScope.Buffers | MTLBarrierScope.Textures | MTLBarrierScope.RenderTargets;
+                        MTLRenderStages stages = MTLRenderStages.RenderStageVertex | MTLRenderStages.RenderStageFragment;
+                        Encoders.RenderEncoder.MemoryBarrier(scope, stages, stages);
+                        break;
+                    }
+                case EncoderType.Compute:
+                    {
+                        var scope = MTLBarrierScope.Buffers | MTLBarrierScope.Textures | MTLBarrierScope.RenderTargets;
+                        Encoders.ComputeEncoder.MemoryBarrier(scope);
+                        break;
+                    }
+            }
+        }
+
+        public void ClearBuffer(BufferHandle destination, int offset, int size, uint value)
+        {
+            var blitCommandEncoder = GetOrCreateBlitEncoder();
+
+            var mtlBuffer = _renderer.BufferManager.GetBuffer(destination, offset, size, true).Get(Cbs, offset, size, true).Value;
+
+            // Might need a closer look, range's count, lower, and upper bound
+            // must be a multiple of 4
+            blitCommandEncoder.FillBuffer(mtlBuffer,
+                new NSRange
+                {
+                    location = (ulong)offset,
+                    length = (ulong)size
+                },
+                (byte)value);
+        }
+
+        public void ClearRenderTargetColor(int index, int layer, int layerCount, uint componentMask, ColorF color)
+        {
+            float[] colors = [color.Red, color.Green, color.Blue, color.Alpha];
+            var dst = _encoderStateManager.RenderTargets[index];
+
+            // TODO: Remove workaround for Wonder which has an invalid texture due to unsupported format
+            if (dst == null)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, "Attempted to clear invalid render target!");
+                return;
+            }
+
+            _renderer.HelperShader.ClearColor(index, colors, componentMask, dst.Width, dst.Height, dst.Info.Format);
+        }
+
+        public void ClearRenderTargetDepthStencil(int layer, int layerCount, float depthValue, bool depthMask, int stencilValue, int stencilMask)
+        {
+            var depthStencil = _encoderStateManager.DepthStencil;
+
+            if (depthStencil == null)
+            {
+                return;
+            }
+
+            _renderer.HelperShader.ClearDepthStencil(depthValue, depthMask, stencilValue, stencilMask, depthStencil.Width, depthStencil.Height);
+        }
+
+        public void CommandBufferBarrier()
+        {
+            Barrier();
+        }
+
+        public void CopyBuffer(BufferHandle src, BufferHandle dst, int srcOffset, int dstOffset, int size)
+        {
+            var srcBuffer = _renderer.BufferManager.GetBuffer(src, srcOffset, size, false);
+            var dstBuffer = _renderer.BufferManager.GetBuffer(dst, dstOffset, size, true);
+
+            BufferHolder.Copy(Cbs, srcBuffer, dstBuffer, srcOffset, dstOffset, size);
+        }
+
+        public void PushDebugGroup(string name)
+        {
+            var encoder = Encoders.CurrentEncoder;
+            var debugGroupName = StringHelper.NSString(name);
+
+            if (encoder == null)
+            {
+                return;
+            }
+
+            switch (Encoders.CurrentEncoderType)
+            {
+                case EncoderType.Render:
+                    encoder.Value.PushDebugGroup(debugGroupName);
+                    break;
+                case EncoderType.Blit:
+                    encoder.Value.PushDebugGroup(debugGroupName);
+                    break;
+                case EncoderType.Compute:
+                    encoder.Value.PushDebugGroup(debugGroupName);
+                    break;
+            }
+        }
+
+        public void PopDebugGroup()
+        {
+            var encoder = Encoders.CurrentEncoder;
+
+            if (encoder == null)
+            {
+                return;
+            }
+
+            switch (Encoders.CurrentEncoderType)
+            {
+                case EncoderType.Render:
+                    encoder.Value.PopDebugGroup();
+                    break;
+                case EncoderType.Blit:
+                    encoder.Value.PopDebugGroup();
+                    break;
+                case EncoderType.Compute:
+                    encoder.Value.PopDebugGroup();
+                    break;
+            }
+        }
+
+        public void DispatchCompute(int groupsX, int groupsY, int groupsZ)
+        {
+            DispatchCompute(groupsX, groupsY, groupsZ, String.Empty);
+        }
+
+        public void DispatchCompute(int groupsX, int groupsY, int groupsZ, string debugGroupName)
+        {
+            var computeCommandEncoder = GetOrCreateComputeEncoder(true);
+
+            ComputeSize localSize = _encoderStateManager.ComputeLocalSize;
+
+            if (debugGroupName != String.Empty)
+            {
+                PushDebugGroup(debugGroupName);
+            }
+
+            computeCommandEncoder.DispatchThreadgroups(
+                new MTLSize { width = (ulong)groupsX, height = (ulong)groupsY, depth = (ulong)groupsZ },
+                new MTLSize { width = (ulong)localSize.X, height = (ulong)localSize.Y, depth = (ulong)localSize.Z });
+
+            if (debugGroupName != String.Empty)
+            {
+                PopDebugGroup();
+            }
+        }
+
+        public void Draw(int vertexCount, int instanceCount, int firstVertex, int firstInstance)
+        {
+            Draw(vertexCount, instanceCount, firstVertex, firstInstance, String.Empty);
+        }
+
+        public void Draw(int vertexCount, int instanceCount, int firstVertex, int firstInstance, string debugGroupName)
+        {
+            if (vertexCount == 0)
+            {
+                return;
+            }
+
+            var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
+
+            if (TopologyUnsupported(_encoderStateManager.Topology))
+            {
+                var pattern = GetIndexBufferPattern();
+
+                BufferHandle handle = pattern.GetRepeatingBuffer(vertexCount, out int indexCount);
+                var buffer = _renderer.BufferManager.GetBuffer(handle, false);
+                var mtlBuffer = buffer.Get(Cbs, 0, indexCount * sizeof(int)).Value;
+
+                var renderCommandEncoder = GetOrCreateRenderEncoder(true);
+
+                renderCommandEncoder.DrawIndexedPrimitives(
+                    primitiveType,
+                    (ulong)indexCount,
+                    MTLIndexType.UInt32,
+                    mtlBuffer,
+                    0);
+            }
+            else
+            {
+                var renderCommandEncoder = GetOrCreateRenderEncoder(true);
+
+                if (debugGroupName != String.Empty)
+                {
+                    PushDebugGroup(debugGroupName);
+                }
+
+                renderCommandEncoder.DrawPrimitives(
+                    primitiveType,
+                    (ulong)firstVertex,
+                    (ulong)vertexCount,
+                    (ulong)instanceCount,
+                    (ulong)firstInstance);
+
+                if (debugGroupName != String.Empty)
+                {
+                    PopDebugGroup();
+                }
+            }
+        }
+
+        private IndexBufferPattern GetIndexBufferPattern()
+        {
+            return _encoderStateManager.Topology switch
+            {
+                PrimitiveTopology.Quads => QuadsToTrisPattern,
+                PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => TriFanToTrisPattern,
+                _ => throw new NotSupportedException($"Unsupported topology: {_encoderStateManager.Topology}"),
+            };
+        }
+
+        private PrimitiveTopology TopologyRemap(PrimitiveTopology topology)
+        {
+            return topology switch
+            {
+                PrimitiveTopology.Quads => PrimitiveTopology.Triangles,
+                PrimitiveTopology.QuadStrip => PrimitiveTopology.TriangleStrip,
+                PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => PrimitiveTopology.Triangles,
+                _ => topology,
+            };
+        }
+
+        private bool TopologyUnsupported(PrimitiveTopology topology)
+        {
+            return topology switch
+            {
+                PrimitiveTopology.Quads or PrimitiveTopology.TriangleFan or PrimitiveTopology.Polygon => true,
+                _ => false,
+            };
+        }
+
+        public void DrawIndexed(int indexCount, int instanceCount, int firstIndex, int firstVertex, int firstInstance)
+        {
+            if (indexCount == 0)
+            {
+                return;
+            }
+
+            MTLBuffer mtlBuffer;
+            int offset;
+            MTLIndexType type;
+            int finalIndexCount = indexCount;
+
+            var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
+
+            if (TopologyUnsupported(_encoderStateManager.Topology))
+            {
+                var pattern = GetIndexBufferPattern();
+                int convertedCount = pattern.GetConvertedCount(indexCount);
+
+                finalIndexCount = convertedCount;
+
+                (mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetConvertedIndexBuffer(_renderer, Cbs, firstIndex, indexCount, convertedCount, pattern);
+            }
+            else
+            {
+                (mtlBuffer, offset, type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs);
+            }
+
+            if (mtlBuffer.NativePtr != IntPtr.Zero)
+            {
+                var renderCommandEncoder = GetOrCreateRenderEncoder(true);
+
+                renderCommandEncoder.DrawIndexedPrimitives(
+                    primitiveType,
+                    (ulong)finalIndexCount,
+                    type,
+                    mtlBuffer,
+                    (ulong)offset,
+                    (ulong)instanceCount,
+                    firstVertex,
+                    (ulong)firstInstance);
+            }
+        }
+
+        public void DrawIndexedIndirect(BufferRange indirectBuffer)
+        {
+            DrawIndexedIndirectOffset(indirectBuffer);
+        }
+
+        public void DrawIndexedIndirectOffset(BufferRange indirectBuffer, int offset = 0)
+        {
+            // TODO: Reindex unsupported topologies
+            if (TopologyUnsupported(_encoderStateManager.Topology))
+            {
+                Logger.Warning?.Print(LogClass.Gpu, $"Drawing indexed with unsupported topology: {_encoderStateManager.Topology}");
+            }
+
+            var buffer = _renderer.BufferManager
+                .GetBuffer(indirectBuffer.Handle, indirectBuffer.Offset, indirectBuffer.Size, false)
+                .Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value;
+
+            var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
+
+            (MTLBuffer indexBuffer, int indexOffset, MTLIndexType type) = _encoderStateManager.IndexBuffer.GetIndexBuffer(_renderer, Cbs);
+
+            if (indexBuffer.NativePtr != IntPtr.Zero && buffer.NativePtr != IntPtr.Zero)
+            {
+                var renderCommandEncoder = GetOrCreateRenderEncoder(true);
+
+                renderCommandEncoder.DrawIndexedPrimitives(
+                    primitiveType,
+                    type,
+                    indexBuffer,
+                    (ulong)indexOffset,
+                    buffer,
+                    (ulong)(indirectBuffer.Offset + offset));
+            }
+        }
+
+        public void DrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride)
+        {
+            for (int i = 0; i < maxDrawCount; i++)
+            {
+                DrawIndexedIndirectOffset(indirectBuffer, stride * i);
+            }
+        }
+
+        public void DrawIndirect(BufferRange indirectBuffer)
+        {
+            DrawIndirectOffset(indirectBuffer);
+        }
+
+        public void DrawIndirectOffset(BufferRange indirectBuffer, int offset = 0)
+        {
+            if (TopologyUnsupported(_encoderStateManager.Topology))
+            {
+                // TODO: Reindex unsupported topologies
+                Logger.Warning?.Print(LogClass.Gpu, $"Drawing indirect with unsupported topology: {_encoderStateManager.Topology}");
+            }
+
+            var buffer = _renderer.BufferManager
+                .GetBuffer(indirectBuffer.Handle, indirectBuffer.Offset, indirectBuffer.Size, false)
+                .Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value;
+
+            var primitiveType = TopologyRemap(_encoderStateManager.Topology).Convert();
+            var renderCommandEncoder = GetOrCreateRenderEncoder(true);
+
+            renderCommandEncoder.DrawPrimitives(
+                primitiveType,
+                buffer,
+                (ulong)(indirectBuffer.Offset + offset));
+        }
+
+        public void DrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride)
+        {
+            for (int i = 0; i < maxDrawCount; i++)
+            {
+                DrawIndirectOffset(indirectBuffer, stride * i);
+            }
+        }
+
+        public void DrawTexture(ITexture texture, ISampler sampler, Extents2DF srcRegion, Extents2DF dstRegion)
+        {
+            _renderer.HelperShader.DrawTexture(texture, sampler, srcRegion, dstRegion);
+        }
+
+        public void SetAlphaTest(bool enable, float reference, CompareOp op)
+        {
+            // This is currently handled using shader specialization, as Metal does not support alpha test.
+            // In the future, we may want to use this to write the reference value into the support buffer,
+            // to avoid creating one version of the shader per reference value used.
+        }
+
+        public void SetBlendState(AdvancedBlendDescriptor blend)
+        {
+            // Metal does not support advanced blend.
+        }
+
+        public void SetBlendState(int index, BlendDescriptor blend)
+        {
+            _encoderStateManager.UpdateBlendDescriptors(index, blend);
+        }
+
+        public void SetDepthBias(PolygonModeMask enables, float factor, float units, float clamp)
+        {
+            if (enables == 0)
+            {
+                _encoderStateManager.UpdateDepthBias(0, 0, 0);
+            }
+            else
+            {
+                _encoderStateManager.UpdateDepthBias(units, factor, clamp);
+            }
+        }
+
+        public void SetDepthClamp(bool clamp)
+        {
+            _encoderStateManager.UpdateDepthClamp(clamp);
+        }
+
+        public void SetDepthMode(DepthMode mode)
+        {
+            // Metal does not support depth clip control.
+        }
+
+        public void SetDepthTest(DepthTestDescriptor depthTest)
+        {
+            _encoderStateManager.UpdateDepthState(depthTest);
+        }
+
+        public void SetFaceCulling(bool enable, Face face)
+        {
+            _encoderStateManager.UpdateCullMode(enable, face);
+        }
+
+        public void SetFrontFace(FrontFace frontFace)
+        {
+            _encoderStateManager.UpdateFrontFace(frontFace);
+        }
+
+        public void SetIndexBuffer(BufferRange buffer, IndexType type)
+        {
+            _encoderStateManager.UpdateIndexBuffer(buffer, type);
+        }
+
+        public void SetImage(ShaderStage stage, int binding, ITexture image)
+        {
+            if (image is TextureBase img)
+            {
+                _encoderStateManager.UpdateImage(stage, binding, img);
+            }
+        }
+
+        public void SetImageArray(ShaderStage stage, int binding, IImageArray array)
+        {
+            if (array is ImageArray imageArray)
+            {
+                _encoderStateManager.UpdateImageArray(stage, binding, imageArray);
+            }
+        }
+
+        public void SetImageArraySeparate(ShaderStage stage, int setIndex, IImageArray array)
+        {
+            if (array is ImageArray imageArray)
+            {
+                _encoderStateManager.UpdateImageArraySeparate(stage, setIndex, imageArray);
+            }
+        }
+
+        public void SetLineParameters(float width, bool smooth)
+        {
+            // Metal does not support wide-lines.
+        }
+
+        public void SetLogicOpState(bool enable, LogicalOp op)
+        {
+            _encoderStateManager.UpdateLogicOpState(enable, op);
+        }
+
+        public void SetMultisampleState(MultisampleDescriptor multisample)
+        {
+            _encoderStateManager.UpdateMultisampleState(multisample);
+        }
+
+        public void SetPatchParameters(int vertices, ReadOnlySpan<float> defaultOuterLevel, ReadOnlySpan<float> defaultInnerLevel)
+        {
+            Logger.Warning?.Print(LogClass.Gpu, "Not Implemented!");
+        }
+
+        public void SetPointParameters(float size, bool isProgramPointSize, bool enablePointSprite, Origin origin)
+        {
+            Logger.Warning?.Print(LogClass.Gpu, "Not Implemented!");
+        }
+
+        public void SetPolygonMode(PolygonMode frontMode, PolygonMode backMode)
+        {
+            // Metal does not support polygon mode.
+        }
+
+        public void SetPrimitiveRestart(bool enable, int index)
+        {
+            // Always active for LineStrip and TriangleStrip
+            // https://github.com/gpuweb/gpuweb/issues/1220#issuecomment-732483263
+            // https://developer.apple.com/documentation/metal/mtlrendercommandencoder/1515520-drawindexedprimitives
+            // https://stackoverflow.com/questions/70813665/how-to-render-multiple-trianglestrips-using-metal
+
+            // Emulating disabling this is very difficult. It's unlikely for an index buffer to use the largest possible index,
+            // so it's fine nearly all of the time.
+        }
+
+        public void SetPrimitiveTopology(PrimitiveTopology topology)
+        {
+            _encoderStateManager.UpdatePrimitiveTopology(topology);
+        }
+
+        public void SetProgram(IProgram program)
+        {
+            _encoderStateManager.UpdateProgram(program);
+        }
+
+        public void SetRasterizerDiscard(bool discard)
+        {
+            _encoderStateManager.UpdateRasterizerDiscard(discard);
+        }
+
+        public void SetRenderTargetColorMasks(ReadOnlySpan<uint> componentMask)
+        {
+            _encoderStateManager.UpdateRenderTargetColorMasks(componentMask);
+        }
+
+        public void SetRenderTargets(ITexture[] colors, ITexture depthStencil)
+        {
+            _encoderStateManager.UpdateRenderTargets(colors, depthStencil);
+        }
+
+        public void SetScissors(ReadOnlySpan<Rectangle<int>> regions)
+        {
+            _encoderStateManager.UpdateScissors(regions);
+        }
+
+        public void SetStencilTest(StencilTestDescriptor stencilTest)
+        {
+            _encoderStateManager.UpdateStencilState(stencilTest);
+        }
+
+        public void SetUniformBuffers(ReadOnlySpan<BufferAssignment> buffers)
+        {
+            _encoderStateManager.UpdateUniformBuffers(buffers);
+        }
+
+        public void SetStorageBuffers(ReadOnlySpan<BufferAssignment> buffers)
+        {
+            _encoderStateManager.UpdateStorageBuffers(buffers);
+        }
+
+        internal void SetStorageBuffers(int first, ReadOnlySpan<Auto<DisposableBuffer>> buffers)
+        {
+            _encoderStateManager.UpdateStorageBuffers(first, buffers);
+        }
+
+        public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler)
+        {
+            if (texture is TextureBase tex)
+            {
+                if (sampler == null || sampler is SamplerHolder)
+                {
+                    _encoderStateManager.UpdateTextureAndSampler(stage, binding, tex, (SamplerHolder)sampler);
+                }
+            }
+        }
+
+        public void SetTextureArray(ShaderStage stage, int binding, ITextureArray array)
+        {
+            if (array is TextureArray textureArray)
+            {
+                _encoderStateManager.UpdateTextureArray(stage, binding, textureArray);
+            }
+        }
+
+        public void SetTextureArraySeparate(ShaderStage stage, int setIndex, ITextureArray array)
+        {
+            if (array is TextureArray textureArray)
+            {
+                _encoderStateManager.UpdateTextureArraySeparate(stage, setIndex, textureArray);
+            }
+        }
+
+        public void SetUserClipDistance(int index, bool enableClip)
+        {
+            // TODO. Same as Vulkan
+        }
+
+        public void SetVertexAttribs(ReadOnlySpan<VertexAttribDescriptor> vertexAttribs)
+        {
+            _encoderStateManager.UpdateVertexAttribs(vertexAttribs);
+        }
+
+        public void SetVertexBuffers(ReadOnlySpan<VertexBufferDescriptor> vertexBuffers)
+        {
+            _encoderStateManager.UpdateVertexBuffers(vertexBuffers);
+        }
+
+        public void SetViewports(ReadOnlySpan<Viewport> viewports)
+        {
+            _encoderStateManager.UpdateViewports(viewports);
+        }
+
+        public void TextureBarrier()
+        {
+            if (CurrentEncoderType == EncoderType.Render)
+            {
+                Encoders.RenderEncoder.MemoryBarrier(MTLBarrierScope.Textures, MTLRenderStages.RenderStageFragment, MTLRenderStages.RenderStageFragment);
+            }
+        }
+
+        public void TextureBarrierTiled()
+        {
+            TextureBarrier();
+        }
+
+        public bool TryHostConditionalRendering(ICounterEvent value, ulong compare, bool isEqual)
+        {
+            // TODO: Implementable via indirect draw commands
+            return false;
+        }
+
+        public bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual)
+        {
+            // TODO: Implementable via indirect draw commands
+            return false;
+        }
+
+        public void EndHostConditionalRendering()
+        {
+            // TODO: Implementable via indirect draw commands
+        }
+
+        public void BeginTransformFeedback(PrimitiveTopology topology)
+        {
+            // Metal does not support transform feedback.
+        }
+
+        public void EndTransformFeedback()
+        {
+            // Metal does not support transform feedback.
+        }
+
+        public void SetTransformFeedbackBuffers(ReadOnlySpan<BufferRange> buffers)
+        {
+            // Metal does not support transform feedback.
+        }
+
+        public void Dispose()
+        {
+            EndCurrentPass();
+            _encoderStateManager.Dispose();
+        }
+    }
+}

+ 286 - 0
src/Ryujinx.Graphics.Metal/Program.cs

@@ -0,0 +1,286 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Shader;
+using SharpMetal.Foundation;
+using SharpMetal.Metal;
+using System;
+using System.Collections.Generic;
+using System.Collections.ObjectModel;
+using System.Runtime.InteropServices;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class Program : IProgram
+    {
+        private ProgramLinkStatus _status;
+        private readonly ShaderSource[] _shaders;
+        private readonly GCHandle[] _handles;
+        private int _successCount;
+
+        private readonly MetalRenderer _renderer;
+
+        public MTLFunction VertexFunction;
+        public MTLFunction FragmentFunction;
+        public MTLFunction ComputeFunction;
+        public ComputeSize ComputeLocalSize { get; }
+
+        private HashTableSlim<PipelineUid, MTLRenderPipelineState> _graphicsPipelineCache;
+        private MTLComputePipelineState? _computePipelineCache;
+        private bool _firstBackgroundUse;
+
+        public ResourceBindingSegment[][] BindingSegments { get; }
+        // Argument buffer sizes for Vertex or Compute stages
+        public int[] ArgumentBufferSizes { get; }
+        // Argument buffer sizes for Fragment stage
+        public int[] FragArgumentBufferSizes { get; }
+
+        public Program(
+            MetalRenderer renderer,
+            MTLDevice device,
+            ShaderSource[] shaders,
+            ResourceLayout resourceLayout,
+            ComputeSize computeLocalSize = default)
+        {
+            _renderer = renderer;
+            renderer.Programs.Add(this);
+
+            ComputeLocalSize = computeLocalSize;
+            _shaders = shaders;
+            _handles = new GCHandle[_shaders.Length];
+
+            _status = ProgramLinkStatus.Incomplete;
+
+            for (int i = 0; i < _shaders.Length; i++)
+            {
+                ShaderSource shader = _shaders[i];
+
+                using var compileOptions = new MTLCompileOptions
+                {
+                    PreserveInvariance = true,
+                    LanguageVersion = MTLLanguageVersion.Version31,
+                };
+                var index = i;
+
+                _handles[i] = device.NewLibrary(StringHelper.NSString(shader.Code), compileOptions, (library, error) => CompilationResultHandler(library, error, index));
+            }
+
+            (BindingSegments, ArgumentBufferSizes, FragArgumentBufferSizes) = BuildBindingSegments(resourceLayout.SetUsages);
+        }
+
+        public void CompilationResultHandler(MTLLibrary library, NSError error, int index)
+        {
+            var shader = _shaders[index];
+
+            if (_handles[index].IsAllocated)
+            {
+                _handles[index].Free();
+            }
+
+            if (error != IntPtr.Zero)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, shader.Code);
+                Logger.Warning?.Print(LogClass.Gpu, $"{shader.Stage} shader linking failed: \n{StringHelper.String(error.LocalizedDescription)}");
+                _status = ProgramLinkStatus.Failure;
+                return;
+            }
+
+            switch (shader.Stage)
+            {
+                case ShaderStage.Compute:
+                    ComputeFunction = library.NewFunction(StringHelper.NSString("kernelMain"));
+                    break;
+                case ShaderStage.Vertex:
+                    VertexFunction = library.NewFunction(StringHelper.NSString("vertexMain"));
+                    break;
+                case ShaderStage.Fragment:
+                    FragmentFunction = library.NewFunction(StringHelper.NSString("fragmentMain"));
+                    break;
+                default:
+                    Logger.Warning?.Print(LogClass.Gpu, $"Cannot handle stage {shader.Stage}!");
+                    break;
+            }
+
+            _successCount++;
+
+            if (_successCount >= _shaders.Length && _status != ProgramLinkStatus.Failure)
+            {
+                _status = ProgramLinkStatus.Success;
+            }
+        }
+
+        private static (ResourceBindingSegment[][], int[], int[]) BuildBindingSegments(ReadOnlyCollection<ResourceUsageCollection> setUsages)
+        {
+            ResourceBindingSegment[][] segments = new ResourceBindingSegment[setUsages.Count][];
+            int[] argBufferSizes = new int[setUsages.Count];
+            int[] fragArgBufferSizes = new int[setUsages.Count];
+
+            for (int setIndex = 0; setIndex < setUsages.Count; setIndex++)
+            {
+                List<ResourceBindingSegment> currentSegments = new();
+
+                ResourceUsage currentUsage = default;
+                int currentCount = 0;
+
+                for (int index = 0; index < setUsages[setIndex].Usages.Count; index++)
+                {
+                    ResourceUsage usage = setUsages[setIndex].Usages[index];
+
+                    if (currentUsage.Binding + currentCount != usage.Binding ||
+                        currentUsage.Type != usage.Type ||
+                        currentUsage.Stages != usage.Stages ||
+                        currentUsage.ArrayLength > 1 ||
+                        usage.ArrayLength > 1)
+                    {
+                        if (currentCount != 0)
+                        {
+                            currentSegments.Add(new ResourceBindingSegment(
+                                currentUsage.Binding,
+                                currentCount,
+                                currentUsage.Type,
+                                currentUsage.Stages,
+                                currentUsage.ArrayLength > 1));
+
+                            var size = currentCount * ResourcePointerSize(currentUsage.Type);
+                            if (currentUsage.Stages.HasFlag(ResourceStages.Fragment))
+                            {
+                                fragArgBufferSizes[setIndex] += size;
+                            }
+
+                            if (currentUsage.Stages.HasFlag(ResourceStages.Vertex) ||
+                                currentUsage.Stages.HasFlag(ResourceStages.Compute))
+                            {
+                                argBufferSizes[setIndex] += size;
+                            }
+                        }
+
+                        currentUsage = usage;
+                        currentCount = usage.ArrayLength;
+                    }
+                    else
+                    {
+                        currentCount++;
+                    }
+                }
+
+                if (currentCount != 0)
+                {
+                    currentSegments.Add(new ResourceBindingSegment(
+                        currentUsage.Binding,
+                        currentCount,
+                        currentUsage.Type,
+                        currentUsage.Stages,
+                        currentUsage.ArrayLength > 1));
+
+                    var size = currentCount * ResourcePointerSize(currentUsage.Type);
+                    if (currentUsage.Stages.HasFlag(ResourceStages.Fragment))
+                    {
+                        fragArgBufferSizes[setIndex] += size;
+                    }
+
+                    if (currentUsage.Stages.HasFlag(ResourceStages.Vertex) ||
+                        currentUsage.Stages.HasFlag(ResourceStages.Compute))
+                    {
+                        argBufferSizes[setIndex] += size;
+                    }
+                }
+
+                segments[setIndex] = currentSegments.ToArray();
+            }
+
+            return (segments, argBufferSizes, fragArgBufferSizes);
+        }
+
+        private static int ResourcePointerSize(ResourceType type)
+        {
+            return (type == ResourceType.TextureAndSampler ? 2 : 1);
+        }
+
+        public ProgramLinkStatus CheckProgramLink(bool blocking)
+        {
+            if (blocking)
+            {
+                while (_status == ProgramLinkStatus.Incomplete)
+                { }
+
+                return _status;
+            }
+
+            return _status;
+        }
+
+        public byte[] GetBinary()
+        {
+            return [];
+        }
+
+        public void AddGraphicsPipeline(ref PipelineUid key, MTLRenderPipelineState pipeline)
+        {
+            (_graphicsPipelineCache ??= new()).Add(ref key, pipeline);
+        }
+
+        public void AddComputePipeline(MTLComputePipelineState pipeline)
+        {
+            _computePipelineCache = pipeline;
+        }
+
+        public bool TryGetGraphicsPipeline(ref PipelineUid key, out MTLRenderPipelineState pipeline)
+        {
+            if (_graphicsPipelineCache == null)
+            {
+                pipeline = default;
+                return false;
+            }
+
+            if (!_graphicsPipelineCache.TryGetValue(ref key, out pipeline))
+            {
+                if (_firstBackgroundUse)
+                {
+                    Logger.Warning?.Print(LogClass.Gpu, "Background pipeline compile missed on draw - incorrect pipeline state?");
+                    _firstBackgroundUse = false;
+                }
+
+                return false;
+            }
+
+            _firstBackgroundUse = false;
+
+            return true;
+        }
+
+        public bool TryGetComputePipeline(out MTLComputePipelineState pipeline)
+        {
+            if (_computePipelineCache.HasValue)
+            {
+                pipeline = _computePipelineCache.Value;
+                return true;
+            }
+
+            pipeline = default;
+            return false;
+        }
+
+        public void Dispose()
+        {
+            if (!_renderer.Programs.Remove(this))
+            {
+                return;
+            }
+
+            if (_graphicsPipelineCache != null)
+            {
+                foreach (MTLRenderPipelineState pipeline in _graphicsPipelineCache.Values)
+                {
+                    pipeline.Dispose();
+                }
+            }
+
+            _computePipelineCache?.Dispose();
+
+            VertexFunction.Dispose();
+            FragmentFunction.Dispose();
+            ComputeFunction.Dispose();
+        }
+    }
+}

+ 22 - 0
src/Ryujinx.Graphics.Metal/ResourceBindingSegment.cs

@@ -0,0 +1,22 @@
+using Ryujinx.Graphics.GAL;
+
+namespace Ryujinx.Graphics.Metal
+{
+    readonly struct ResourceBindingSegment
+    {
+        public readonly int Binding;
+        public readonly int Count;
+        public readonly ResourceType Type;
+        public readonly ResourceStages Stages;
+        public readonly bool IsArray;
+
+        public ResourceBindingSegment(int binding, int count, ResourceType type, ResourceStages stages, bool isArray)
+        {
+            Binding = binding;
+            Count = count;
+            Type = type;
+            Stages = stages;
+            IsArray = isArray;
+        }
+    }
+}

+ 59 - 0
src/Ryujinx.Graphics.Metal/ResourceLayoutBuilder.cs

@@ -0,0 +1,59 @@
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class ResourceLayoutBuilder
+    {
+        private const int TotalSets = MetalRenderer.TotalSets;
+
+        private readonly List<ResourceDescriptor>[] _resourceDescriptors;
+        private readonly List<ResourceUsage>[] _resourceUsages;
+
+        public ResourceLayoutBuilder()
+        {
+            _resourceDescriptors = new List<ResourceDescriptor>[TotalSets];
+            _resourceUsages = new List<ResourceUsage>[TotalSets];
+
+            for (int index = 0; index < TotalSets; index++)
+            {
+                _resourceDescriptors[index] = new();
+                _resourceUsages[index] = new();
+            }
+        }
+
+        public ResourceLayoutBuilder Add(ResourceStages stages, ResourceType type, int binding, bool write = false)
+        {
+            uint setIndex = type switch
+            {
+                ResourceType.UniformBuffer => Constants.ConstantBuffersSetIndex,
+                ResourceType.StorageBuffer => Constants.StorageBuffersSetIndex,
+                ResourceType.TextureAndSampler or ResourceType.BufferTexture => Constants.TexturesSetIndex,
+                ResourceType.Image or ResourceType.BufferImage => Constants.ImagesSetIndex,
+                _ => throw new ArgumentException($"Invalid resource type \"{type}\"."),
+            };
+
+            _resourceDescriptors[setIndex].Add(new ResourceDescriptor(binding, 1, type, stages));
+            _resourceUsages[setIndex].Add(new ResourceUsage(binding, 1, type, stages, write));
+
+            return this;
+        }
+
+        public ResourceLayout Build()
+        {
+            var descriptors = new ResourceDescriptorCollection[TotalSets];
+            var usages = new ResourceUsageCollection[TotalSets];
+
+            for (int index = 0; index < TotalSets; index++)
+            {
+                descriptors[index] = new ResourceDescriptorCollection(_resourceDescriptors[index].ToArray().AsReadOnly());
+                usages[index] = new ResourceUsageCollection(_resourceUsages[index].ToArray().AsReadOnly());
+            }
+
+            return new ResourceLayout(descriptors.AsReadOnly(), usages.AsReadOnly());
+        }
+    }
+}

+ 30 - 0
src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj

@@ -0,0 +1,30 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+        <ProjectReference Include="..\Ryujinx.Graphics.GAL\Ryujinx.Graphics.GAL.csproj" />
+    </ItemGroup>
+
+    <ItemGroup>
+      <PackageReference Include="SharpMetal" />
+    </ItemGroup>
+
+    <ItemGroup>
+      <EmbeddedResource Include="Shaders\Blit.metal" />
+      <EmbeddedResource Include="Shaders\BlitMs.metal" />
+      <EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
+      <EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" />
+      <EmbeddedResource Include="Shaders\ConvertIndexBuffer.metal" />
+      <EmbeddedResource Include="Shaders\ColorClear.metal" />
+      <EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
+      <EmbeddedResource Include="Shaders\DepthBlit.metal" />
+      <EmbeddedResource Include="Shaders\DepthBlitMs.metal" />
+      <EmbeddedResource Include="Shaders\StencilBlit.metal" />
+      <EmbeddedResource Include="Shaders\StencilBlitMs.metal" />
+    </ItemGroup>
+
+</Project>

+ 90 - 0
src/Ryujinx.Graphics.Metal/SamplerHolder.cs

@@ -0,0 +1,90 @@
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class SamplerHolder : ISampler
+    {
+        private readonly MetalRenderer _renderer;
+        private readonly Auto<DisposableSampler> _sampler;
+
+        public SamplerHolder(MetalRenderer renderer, MTLDevice device, SamplerCreateInfo info)
+        {
+            _renderer = renderer;
+
+            renderer.Samplers.Add(this);
+
+            (MTLSamplerMinMagFilter minFilter, MTLSamplerMipFilter mipFilter) = info.MinFilter.Convert();
+
+            MTLSamplerBorderColor borderColor = GetConstrainedBorderColor(info.BorderColor, out _);
+
+            using var descriptor = new MTLSamplerDescriptor
+            {
+                BorderColor = borderColor,
+                MinFilter = minFilter,
+                MagFilter = info.MagFilter.Convert(),
+                MipFilter = mipFilter,
+                CompareFunction = info.CompareOp.Convert(),
+                LodMinClamp = info.MinLod,
+                LodMaxClamp = info.MaxLod,
+                LodAverage = false,
+                MaxAnisotropy = Math.Max((uint)info.MaxAnisotropy, 1),
+                SAddressMode = info.AddressU.Convert(),
+                TAddressMode = info.AddressV.Convert(),
+                RAddressMode = info.AddressP.Convert(),
+                SupportArgumentBuffers = true
+            };
+
+            var sampler = device.NewSamplerState(descriptor);
+
+            _sampler = new Auto<DisposableSampler>(new DisposableSampler(sampler));
+        }
+
+        private static MTLSamplerBorderColor GetConstrainedBorderColor(ColorF arbitraryBorderColor, out bool cantConstrain)
+        {
+            float r = arbitraryBorderColor.Red;
+            float g = arbitraryBorderColor.Green;
+            float b = arbitraryBorderColor.Blue;
+            float a = arbitraryBorderColor.Alpha;
+
+            if (r == 0f && g == 0f && b == 0f)
+            {
+                if (a == 1f)
+                {
+                    cantConstrain = false;
+                    return MTLSamplerBorderColor.OpaqueBlack;
+                }
+
+                if (a == 0f)
+                {
+                    cantConstrain = false;
+                    return MTLSamplerBorderColor.TransparentBlack;
+                }
+            }
+            else if (r == 1f && g == 1f && b == 1f && a == 1f)
+            {
+                cantConstrain = false;
+                return MTLSamplerBorderColor.OpaqueWhite;
+            }
+
+            cantConstrain = true;
+            return MTLSamplerBorderColor.OpaqueBlack;
+        }
+
+        public Auto<DisposableSampler> GetSampler()
+        {
+            return _sampler;
+        }
+
+        public void Dispose()
+        {
+            if (_renderer.Samplers.Remove(this))
+            {
+                _sampler.Dispose();
+            }
+        }
+    }
+}

+ 43 - 0
src/Ryujinx.Graphics.Metal/Shaders/Blit.metal

@@ -0,0 +1,43 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct TexCoords {
+    float data[4];
+};
+
+struct ConstantBuffers {
+    constant TexCoords* tex_coord;
+};
+
+struct Textures
+{
+    texture2d<FORMAT, access::sample> texture;
+    sampler sampler;
+};
+
+vertex CopyVertexOut vertexMain(uint vid [[vertex_id]],
+                                constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) {
+    CopyVertexOut out;
+
+    int low = vid & 1;
+    int high = vid >> 1;
+    out.uv.x = constant_buffers.tex_coord->data[low];
+    out.uv.y = constant_buffers.tex_coord->data[2 + high];
+    out.position.x = (float(low) - 0.5f) * 2.0f;
+    out.position.y = (float(high) - 0.5f) * 2.0f;
+    out.position.z = 0.0f;
+    out.position.w = 1.0f;
+
+    return out;
+}
+
+fragment FORMAT4 fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]]) {
+    return textures.texture.sample(textures.sampler, in.uv);
+}

+ 45 - 0
src/Ryujinx.Graphics.Metal/Shaders/BlitMs.metal

@@ -0,0 +1,45 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct TexCoords {
+    float data[4];
+};
+
+struct ConstantBuffers {
+    constant TexCoords* tex_coord;
+};
+
+struct Textures
+{
+    texture2d_ms<FORMAT, access::read> texture;
+};
+
+vertex CopyVertexOut vertexMain(uint vid [[vertex_id]],
+                                constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) {
+    CopyVertexOut out;
+
+    int low = vid & 1;
+    int high = vid >> 1;
+    out.uv.x = constant_buffers.tex_coord->data[low];
+    out.uv.y = constant_buffers.tex_coord->data[2 + high];
+    out.position.x = (float(low) - 0.5f) * 2.0f;
+    out.position.y = (float(high) - 0.5f) * 2.0f;
+    out.position.z = 0.0f;
+    out.position.w = 1.0f;
+
+    return out;
+}
+
+fragment FORMAT4 fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]],
+                             uint sample_id [[sample_id]]) {
+    uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height());
+    uint2 tex_coord = uint2(in.uv * float2(tex_size));
+    return textures.texture.read(tex_coord, sample_id);
+}

+ 72 - 0
src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal

@@ -0,0 +1,72 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct StrideArguments {
+    int4 data;
+};
+
+struct InData {
+    uint8_t data[1];
+};
+
+struct OutData {
+    uint8_t data[1];
+};
+
+struct ConstantBuffers {
+    constant StrideArguments* stride_arguments;
+};
+
+struct StorageBuffers {
+    device InData* in_data;
+    device OutData* out_data;
+};
+
+kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]],
+                       device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
+                       uint3 thread_position_in_grid [[thread_position_in_grid]],
+                       uint3 threads_per_threadgroup [[threads_per_threadgroup]],
+                       uint3 threadgroups_per_grid [[threadgroups_per_grid]])
+{
+    // Determine what slice of the stride copies this invocation will perform.
+
+    int sourceStride = constant_buffers.stride_arguments->data.x;
+    int targetStride = constant_buffers.stride_arguments->data.y;
+    int bufferSize = constant_buffers.stride_arguments->data.z;
+    int sourceOffset = constant_buffers.stride_arguments->data.w;
+
+    int strideRemainder = targetStride - sourceStride;
+    int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
+
+    int copiesRequired = bufferSize / sourceStride;
+
+    // Find the copies that this invocation should perform.
+
+    // - Copies that all invocations perform.
+    int allInvocationCopies = copiesRequired / invocations;
+
+    // - Extra remainder copy that this invocation performs.
+    int index = int(thread_position_in_grid.x);
+    int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
+
+    int copyCount = allInvocationCopies + extra;
+
+    // Finally, get the starting offset. Make sure to count extra copies.
+
+    int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
+
+    int srcOffset = sourceOffset + startCopy * sourceStride;
+    int dstOffset = startCopy * targetStride;
+
+    // Perform the copies for this region
+    for (int i = 0; i < copyCount; i++) {
+        for (int j = 0; j < sourceStride; j++) {
+            storage_buffers.out_data->data[dstOffset++] = storage_buffers.in_data->data[srcOffset++];
+        }
+
+        for (int j = 0; j < strideRemainder; j++) {
+            storage_buffers.out_data->data[dstOffset++] = uint8_t(0);
+        }
+    }
+}

+ 38 - 0
src/Ryujinx.Graphics.Metal/Shaders/ColorClear.metal

@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct VertexOut {
+    float4 position [[position]];
+};
+
+struct ClearColor {
+    FORMAT4 data;
+};
+
+struct ConstantBuffers {
+    constant ClearColor* clear_color;
+};
+
+vertex VertexOut vertexMain(ushort vid [[vertex_id]]) {
+    int low = vid & 1;
+    int high = vid >> 1;
+
+    VertexOut out;
+
+    out.position.x = (float(low) - 0.5f) * 2.0f;
+    out.position.y = (float(high) - 0.5f) * 2.0f;
+    out.position.z = 0.0f;
+    out.position.w = 1.0f;
+
+    return out;
+}
+
+struct FragmentOut {
+    FORMAT4 color [[color(COLOR_ATTACHMENT_INDEX)]];
+};
+
+fragment FragmentOut fragmentMain(VertexOut in [[stage_in]],
+                                  constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) {
+    return {constant_buffers.clear_color->data};
+}

+ 66 - 0
src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal

@@ -0,0 +1,66 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct StrideArguments {
+    int pixelCount;
+    int dstStartOffset;
+};
+
+struct InData {
+    uint data[1];
+};
+
+struct OutData {
+    uint data[1];
+};
+
+struct ConstantBuffers {
+    constant StrideArguments* stride_arguments;
+};
+
+struct StorageBuffers {
+    device InData* in_data;
+    device OutData* out_data;
+};
+
+kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]],
+                       device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
+                       uint3 thread_position_in_grid [[thread_position_in_grid]],
+                       uint3 threads_per_threadgroup [[threads_per_threadgroup]],
+                       uint3 threadgroups_per_grid [[threadgroups_per_grid]])
+{
+    // Determine what slice of the stride copies this invocation will perform.
+    int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
+
+    int copiesRequired = constant_buffers.stride_arguments->pixelCount;
+
+    // Find the copies that this invocation should perform.
+
+    // - Copies that all invocations perform.
+    int allInvocationCopies = copiesRequired / invocations;
+
+    // - Extra remainder copy that this invocation performs.
+    int index = int(thread_position_in_grid.x);
+    int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
+
+    int copyCount = allInvocationCopies + extra;
+
+    // Finally, get the starting offset. Make sure to count extra copies.
+
+    int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
+
+    int srcOffset = startCopy * 2;
+    int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy;
+
+    // Perform the conversion for this region.
+    for (int i = 0; i < copyCount; i++)
+    {
+        float depth = as_type<float>(storage_buffers.in_data->data[srcOffset++]);
+        uint stencil = storage_buffers.in_data->data[srcOffset++];
+
+        uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0);
+
+        storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff);
+    }
+}

+ 59 - 0
src/Ryujinx.Graphics.Metal/Shaders/ConvertIndexBuffer.metal

@@ -0,0 +1,59 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct IndexBufferPattern {
+    int pattern[8];
+    int primitiveVertices;
+    int primitiveVerticesOut;
+    int indexSize;
+    int indexSizeOut;
+    int baseIndex;
+    int indexStride;
+    int srcOffset;
+    int totalPrimitives;
+};
+
+struct InData {
+    uint8_t data[1];
+};
+
+struct OutData {
+    uint8_t data[1];
+};
+
+struct StorageBuffers {
+    device InData* in_data;
+    device OutData* out_data;
+    constant IndexBufferPattern* index_buffer_pattern;
+};
+
+kernel void kernelMain(device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
+                       uint3 thread_position_in_grid [[thread_position_in_grid]])
+{
+    int primitiveIndex = int(thread_position_in_grid.x);
+    if (primitiveIndex >= storage_buffers.index_buffer_pattern->totalPrimitives)
+    {
+        return;
+    }
+
+    int inOffset = primitiveIndex * storage_buffers.index_buffer_pattern->indexStride;
+    int outOffset = primitiveIndex * storage_buffers.index_buffer_pattern->primitiveVerticesOut;
+
+    for (int i = 0; i < storage_buffers.index_buffer_pattern->primitiveVerticesOut; i++)
+    {
+        int j;
+        int io = max(0, inOffset + storage_buffers.index_buffer_pattern->baseIndex + storage_buffers.index_buffer_pattern->pattern[i]) * storage_buffers.index_buffer_pattern->indexSize;
+        int oo = (outOffset + i) * storage_buffers.index_buffer_pattern->indexSizeOut;
+
+        for (j = 0; j < storage_buffers.index_buffer_pattern->indexSize; j++)
+        {
+            storage_buffers.out_data->data[oo + j] = storage_buffers.in_data->data[storage_buffers.index_buffer_pattern->srcOffset + io + j];
+        }
+
+        for(; j < storage_buffers.index_buffer_pattern->indexSizeOut; j++)
+        {
+            storage_buffers.out_data->data[oo + j] = uint8_t(0);
+        }
+    }
+}

+ 27 - 0
src/Ryujinx.Graphics.Metal/Shaders/DepthBlit.metal

@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct Textures
+{
+    texture2d<float, access::sample> texture;
+    sampler sampler;
+};
+
+struct FragmentOut {
+    float depth [[depth(any)]];
+};
+
+fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]]) {
+    FragmentOut out;
+
+    out.depth = textures.texture.sample(textures.sampler, in.uv).r;
+
+    return out;
+}

+ 29 - 0
src/Ryujinx.Graphics.Metal/Shaders/DepthBlitMs.metal

@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct Textures
+{
+    texture2d_ms<float, access::read> texture;
+};
+
+struct FragmentOut {
+    float depth [[depth(any)]];
+};
+
+fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]],
+                             uint sample_id [[sample_id]]) {
+    FragmentOut out;
+
+    uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height());
+    uint2 tex_coord = uint2(in.uv * float2(tex_size));
+    out.depth = textures.texture.read(tex_coord, sample_id).r;
+
+    return out;
+}

+ 42 - 0
src/Ryujinx.Graphics.Metal/Shaders/DepthStencilClear.metal

@@ -0,0 +1,42 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct VertexOut {
+    float4 position [[position]];
+};
+
+struct FragmentOut {
+    float depth [[depth(any)]];
+};
+
+struct ClearDepth {
+    float data;
+};
+
+struct ConstantBuffers {
+    constant ClearDepth* clear_depth;
+};
+
+vertex VertexOut vertexMain(ushort vid [[vertex_id]]) {
+    int low = vid & 1;
+    int high = vid >> 1;
+
+    VertexOut out;
+
+    out.position.x = (float(low) - 0.5f) * 2.0f;
+    out.position.y = (float(high) - 0.5f) * 2.0f;
+    out.position.z = 0.0f;
+    out.position.w = 1.0f;
+
+    return out;
+}
+
+fragment FragmentOut fragmentMain(VertexOut in [[stage_in]],
+                                  constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]]) {
+    FragmentOut out;
+
+    out.depth = constant_buffers.clear_depth->data;
+
+    return out;
+}

+ 27 - 0
src/Ryujinx.Graphics.Metal/Shaders/StencilBlit.metal

@@ -0,0 +1,27 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct Textures
+{
+    texture2d<uint, access::sample> texture;
+    sampler sampler;
+};
+
+struct FragmentOut {
+    uint stencil [[stencil]];
+};
+
+fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]]) {
+    FragmentOut out;
+
+    out.stencil = textures.texture.sample(textures.sampler, in.uv).r;
+
+    return out;
+}

+ 29 - 0
src/Ryujinx.Graphics.Metal/Shaders/StencilBlitMs.metal

@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+
+using namespace metal;
+
+struct CopyVertexOut {
+    float4 position [[position]];
+    float2 uv;
+};
+
+struct Textures
+{
+    texture2d_ms<uint, access::read> texture;
+};
+
+struct FragmentOut {
+    uint stencil [[stencil]];
+};
+
+fragment FragmentOut fragmentMain(CopyVertexOut in [[stage_in]],
+                             constant Textures &textures [[buffer(TEXTURES_INDEX)]],
+                             uint sample_id [[sample_id]]) {
+    FragmentOut out;
+
+    uint2 tex_size = uint2(textures.texture.get_width(), textures.texture.get_height());
+    uint2 tex_coord = uint2(in.uv * float2(tex_size));
+    out.stencil = textures.texture.read(tex_coord, sample_id).r;
+
+    return out;
+}

+ 288 - 0
src/Ryujinx.Graphics.Metal/StagingBuffer.cs

@@ -0,0 +1,288 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    readonly struct StagingBufferReserved
+    {
+        public readonly BufferHolder Buffer;
+        public readonly int Offset;
+        public readonly int Size;
+
+        public StagingBufferReserved(BufferHolder buffer, int offset, int size)
+        {
+            Buffer = buffer;
+            Offset = offset;
+            Size = size;
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    class StagingBuffer : IDisposable
+    {
+        private const int BufferSize = 32 * 1024 * 1024;
+
+        private int _freeOffset;
+        private int _freeSize;
+
+        private readonly MetalRenderer _renderer;
+        private readonly BufferHolder _buffer;
+        private readonly int _resourceAlignment;
+
+        public readonly BufferHandle Handle;
+
+        private readonly struct PendingCopy
+        {
+            public FenceHolder Fence { get; }
+            public int Size { get; }
+
+            public PendingCopy(FenceHolder fence, int size)
+            {
+                Fence = fence;
+                Size = size;
+                fence.Get();
+            }
+        }
+
+        private readonly Queue<PendingCopy> _pendingCopies;
+
+        public StagingBuffer(MetalRenderer renderer, BufferManager bufferManager)
+        {
+            _renderer = renderer;
+
+            Handle = bufferManager.CreateWithHandle(BufferSize, out _buffer);
+            _pendingCopies = new Queue<PendingCopy>();
+            _freeSize = BufferSize;
+            _resourceAlignment = Constants.MinResourceAlignment;
+        }
+
+        public void PushData(CommandBufferPool cbp, CommandBufferScoped? cbs, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
+        {
+            bool isRender = cbs != null;
+            CommandBufferScoped scoped = cbs ?? cbp.Rent();
+
+            // Must push all data to the buffer. If it can't fit, split it up.
+
+            while (data.Length > 0)
+            {
+                if (_freeSize < data.Length)
+                {
+                    FreeCompleted();
+                }
+
+                while (_freeSize == 0)
+                {
+                    if (!WaitFreeCompleted(cbp))
+                    {
+                        if (isRender)
+                        {
+                            _renderer.FlushAllCommands();
+                            scoped = cbp.Rent();
+                            isRender = false;
+                        }
+                        else
+                        {
+                            scoped = cbp.ReturnAndRent(scoped);
+                        }
+                    }
+                }
+
+                int chunkSize = Math.Min(_freeSize, data.Length);
+
+                PushDataImpl(scoped, dst, dstOffset, data[..chunkSize]);
+
+                dstOffset += chunkSize;
+                data = data[chunkSize..];
+            }
+
+            if (!isRender)
+            {
+                scoped.Dispose();
+            }
+        }
+
+        private void PushDataImpl(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
+        {
+            var srcBuffer = _buffer.GetBuffer();
+            var dstBuffer = dst.GetBuffer(dstOffset, data.Length, true);
+
+            int offset = _freeOffset;
+            int capacity = BufferSize - offset;
+            if (capacity < data.Length)
+            {
+                _buffer.SetDataUnchecked(offset, data[..capacity]);
+                _buffer.SetDataUnchecked(0, data[capacity..]);
+
+                BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, dstOffset, capacity);
+                BufferHolder.Copy(cbs, srcBuffer, dstBuffer, 0, dstOffset + capacity, data.Length - capacity);
+            }
+            else
+            {
+                _buffer.SetDataUnchecked(offset, data);
+
+                BufferHolder.Copy(cbs, srcBuffer, dstBuffer, offset, dstOffset, data.Length);
+            }
+
+            _freeOffset = (offset + data.Length) & (BufferSize - 1);
+            _freeSize -= data.Length;
+            Debug.Assert(_freeSize >= 0);
+
+            _pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), data.Length));
+        }
+
+        public bool TryPushData(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
+        {
+            if (data.Length > BufferSize)
+            {
+                return false;
+            }
+
+            if (_freeSize < data.Length)
+            {
+                FreeCompleted();
+
+                if (_freeSize < data.Length)
+                {
+                    return false;
+                }
+            }
+
+            PushDataImpl(cbs, dst, dstOffset, data);
+
+            return true;
+        }
+
+        private StagingBufferReserved ReserveDataImpl(CommandBufferScoped cbs, int size, int alignment)
+        {
+            // Assumes the caller has already determined that there is enough space.
+            int offset = BitUtils.AlignUp(_freeOffset, alignment);
+            int padding = offset - _freeOffset;
+
+            int capacity = Math.Min(_freeSize, BufferSize - offset);
+            int reservedLength = size + padding;
+            if (capacity < size)
+            {
+                offset = 0; // Place at start.
+                reservedLength += capacity;
+            }
+
+            _freeOffset = (_freeOffset + reservedLength) & (BufferSize - 1);
+            _freeSize -= reservedLength;
+            Debug.Assert(_freeSize >= 0);
+
+            _pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), reservedLength));
+
+            return new StagingBufferReserved(_buffer, offset, size);
+        }
+
+        private int GetContiguousFreeSize(int alignment)
+        {
+            int alignedFreeOffset = BitUtils.AlignUp(_freeOffset, alignment);
+            int padding = alignedFreeOffset - _freeOffset;
+
+            // Free regions:
+            // - Aligned free offset to end (minimum free size - padding)
+            // - 0 to _freeOffset + freeSize wrapped (only if free area contains 0)
+
+            int endOffset = (_freeOffset + _freeSize) & (BufferSize - 1);
+
+            return Math.Max(
+                Math.Min(_freeSize - padding, BufferSize - alignedFreeOffset),
+                endOffset <= _freeOffset ? Math.Min(_freeSize, endOffset) : 0
+            );
+        }
+
+        /// <summary>
+        /// Reserve a range on the staging buffer for the current command buffer and upload data to it.
+        /// </summary>
+        /// <param name="cbs">Command buffer to reserve the data on</param>
+        /// <param name="size">The minimum size the reserved data requires</param>
+        /// <param name="alignment">The required alignment for the buffer offset</param>
+        /// <returns>The reserved range of the staging buffer</returns>
+        public StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size, int alignment)
+        {
+            if (size > BufferSize)
+            {
+                return null;
+            }
+
+            // Temporary reserved data cannot be fragmented.
+
+            if (GetContiguousFreeSize(alignment) < size)
+            {
+                FreeCompleted();
+
+                if (GetContiguousFreeSize(alignment) < size)
+                {
+                    Logger.Debug?.PrintMsg(LogClass.Gpu, $"Staging buffer out of space to reserve data of size {size}.");
+                    return null;
+                }
+            }
+
+            return ReserveDataImpl(cbs, size, alignment);
+        }
+
+        /// <summary>
+        /// Reserve a range on the staging buffer for the current command buffer and upload data to it.
+        /// Uses the most permissive byte alignment.
+        /// </summary>
+        /// <param name="cbs">Command buffer to reserve the data on</param>
+        /// <param name="size">The minimum size the reserved data requires</param>
+        /// <returns>The reserved range of the staging buffer</returns>
+        public StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size)
+        {
+            return TryReserveData(cbs, size, _resourceAlignment);
+        }
+
+        private bool WaitFreeCompleted(CommandBufferPool cbp)
+        {
+            if (_pendingCopies.TryPeek(out var pc))
+            {
+                if (!pc.Fence.IsSignaled())
+                {
+                    if (cbp.IsFenceOnRentedCommandBuffer(pc.Fence))
+                    {
+                        return false;
+                    }
+
+                    pc.Fence.Wait();
+                }
+
+                var dequeued = _pendingCopies.Dequeue();
+                Debug.Assert(dequeued.Fence == pc.Fence);
+                _freeSize += pc.Size;
+                pc.Fence.Put();
+            }
+
+            return true;
+        }
+
+        public void FreeCompleted()
+        {
+            FenceHolder signalledFence = null;
+            while (_pendingCopies.TryPeek(out var pc) && (pc.Fence == signalledFence || pc.Fence.IsSignaled()))
+            {
+                signalledFence = pc.Fence; // Already checked - don't need to do it again.
+                var dequeued = _pendingCopies.Dequeue();
+                Debug.Assert(dequeued.Fence == pc.Fence);
+                _freeSize += pc.Size;
+                pc.Fence.Put();
+            }
+        }
+
+        public void Dispose()
+        {
+            _renderer.BufferManager.Delete(Handle);
+
+            while (_pendingCopies.TryDequeue(out var pc))
+            {
+                pc.Fence.Put();
+            }
+        }
+    }
+}

+ 110 - 0
src/Ryujinx.Graphics.Metal/State/DepthStencilUid.cs

@@ -0,0 +1,110 @@
+using SharpMetal.Metal;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Metal.State
+{
+    [StructLayout(LayoutKind.Sequential, Pack = 1)]
+    public struct StencilUid
+    {
+        public uint ReadMask;
+        public uint WriteMask;
+        public ushort Operations;
+
+        public MTLStencilOperation StencilFailureOperation
+        {
+            readonly get => (MTLStencilOperation)((Operations >> 0) & 0xF);
+            set => Operations = (ushort)((Operations & 0xFFF0) | ((int)value << 0));
+        }
+
+        public MTLStencilOperation DepthFailureOperation
+        {
+            readonly get => (MTLStencilOperation)((Operations >> 4) & 0xF);
+            set => Operations = (ushort)((Operations & 0xFF0F) | ((int)value << 4));
+        }
+
+        public MTLStencilOperation DepthStencilPassOperation
+        {
+            readonly get => (MTLStencilOperation)((Operations >> 8) & 0xF);
+            set => Operations = (ushort)((Operations & 0xF0FF) | ((int)value << 8));
+        }
+
+        public MTLCompareFunction StencilCompareFunction
+        {
+            readonly get => (MTLCompareFunction)((Operations >> 12) & 0xF);
+            set => Operations = (ushort)((Operations & 0x0FFF) | ((int)value << 12));
+        }
+    }
+
+
+    [StructLayout(LayoutKind.Explicit, Size = 24)]
+    internal struct DepthStencilUid : IEquatable<DepthStencilUid>
+    {
+        [FieldOffset(0)]
+        public StencilUid FrontFace;
+
+        [FieldOffset(10)]
+        public ushort DepthState;
+
+        [FieldOffset(12)]
+        public StencilUid BackFace;
+
+        [FieldOffset(22)]
+        private readonly ushort _padding;
+
+        // Quick access aliases
+#pragma warning disable IDE0044 // Add readonly modifier
+        [FieldOffset(0)]
+        private ulong _id0;
+        [FieldOffset(8)]
+        private ulong _id1;
+        [FieldOffset(0)]
+        private Vector128<byte> _id01;
+        [FieldOffset(16)]
+        private ulong _id2;
+#pragma warning restore IDE0044 // Add readonly modifier
+
+        public MTLCompareFunction DepthCompareFunction
+        {
+            readonly get => (MTLCompareFunction)((DepthState >> 0) & 0xF);
+            set => DepthState = (ushort)((DepthState & 0xFFF0) | ((int)value << 0));
+        }
+
+        public bool StencilTestEnabled
+        {
+            readonly get => ((DepthState >> 4) & 0x1) != 0;
+            set => DepthState = (ushort)((DepthState & 0xFFEF) | ((value ? 1 : 0) << 4));
+        }
+
+        public bool DepthWriteEnabled
+        {
+            readonly get => ((DepthState >> 15) & 0x1) != 0;
+            set => DepthState = (ushort)((DepthState & 0x7FFF) | ((value ? 1 : 0) << 15));
+        }
+
+        public readonly override bool Equals(object obj)
+        {
+            return obj is DepthStencilUid other && EqualsRef(ref other);
+        }
+
+        public readonly bool EqualsRef(ref DepthStencilUid other)
+        {
+            return _id01.Equals(other._id01) && _id2 == other._id2;
+        }
+
+        public readonly bool Equals(DepthStencilUid other)
+        {
+            return EqualsRef(ref other);
+        }
+
+        public readonly override int GetHashCode()
+        {
+            ulong hash64 = _id0 * 23 ^
+                           _id1 * 23 ^
+                           _id2 * 23;
+
+            return (int)hash64 ^ ((int)(hash64 >> 32) * 17);
+        }
+    }
+}

+ 341 - 0
src/Ryujinx.Graphics.Metal/State/PipelineState.cs

@@ -0,0 +1,341 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Foundation;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    struct PipelineState
+    {
+        public PipelineUid Internal;
+
+        public uint StagesCount
+        {
+            readonly get => (byte)((Internal.Id0 >> 0) & 0xFF);
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFFFFFF00) | ((ulong)value << 0);
+        }
+
+        public uint VertexAttributeDescriptionsCount
+        {
+            readonly get => (byte)((Internal.Id0 >> 8) & 0xFF);
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFFFF00FF) | ((ulong)value << 8);
+        }
+
+        public uint VertexBindingDescriptionsCount
+        {
+            readonly get => (byte)((Internal.Id0 >> 16) & 0xFF);
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFFFF00FFFF) | ((ulong)value << 16);
+        }
+
+        public uint ColorBlendAttachmentStateCount
+        {
+            readonly get => (byte)((Internal.Id0 >> 24) & 0xFF);
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFF00FFFFFF) | ((ulong)value << 24);
+        }
+
+        /*
+         * Can be an input to a pipeline, but not sure what the situation for that is.
+        public PrimitiveTopology Topology
+        {
+            readonly get => (PrimitiveTopology)((Internal.Id6 >> 16) & 0xF);
+            set => Internal.Id6 = (Internal.Id6 & 0xFFFFFFFFFFF0FFFF) | ((ulong)value << 16);
+        }
+        */
+
+        public MTLLogicOperation LogicOp
+        {
+            readonly get => (MTLLogicOperation)((Internal.Id0 >> 32) & 0xF);
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFF0FFFFFFFF) | ((ulong)value << 32);
+        }
+
+        //?
+        public bool PrimitiveRestartEnable
+        {
+            readonly get => ((Internal.Id0 >> 36) & 0x1) != 0UL;
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFEFFFFFFFFF) | ((value ? 1UL : 0UL) << 36);
+        }
+
+        public bool RasterizerDiscardEnable
+        {
+            readonly get => ((Internal.Id0 >> 37) & 0x1) != 0UL;
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFDFFFFFFFFF) | ((value ? 1UL : 0UL) << 37);
+        }
+
+        public bool LogicOpEnable
+        {
+            readonly get => ((Internal.Id0 >> 38) & 0x1) != 0UL;
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFBFFFFFFFFF) | ((value ? 1UL : 0UL) << 38);
+        }
+
+        public bool AlphaToCoverageEnable
+        {
+            readonly get => ((Internal.Id0 >> 40) & 0x1) != 0UL;
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFEFFFFFFFFFF) | ((value ? 1UL : 0UL) << 40);
+        }
+
+        public bool AlphaToOneEnable
+        {
+            readonly get => ((Internal.Id0 >> 41) & 0x1) != 0UL;
+            set => Internal.Id0 = (Internal.Id0 & 0xFFFFFDFFFFFFFFFF) | ((value ? 1UL : 0UL) << 41);
+        }
+
+        public MTLPixelFormat DepthStencilFormat
+        {
+            readonly get => (MTLPixelFormat)(Internal.Id0 >> 48);
+            set => Internal.Id0 = (Internal.Id0 & 0x0000FFFFFFFFFFFF) | ((ulong)value << 48);
+        }
+
+        // Not sure how to appropriately use this, but it does need to be passed for tess.
+        public uint PatchControlPoints
+        {
+            readonly get => (uint)((Internal.Id1 >> 0) & 0xFFFFFFFF);
+            set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF00000000) | ((ulong)value << 0);
+        }
+
+        public uint SamplesCount
+        {
+            readonly get => (uint)((Internal.Id1 >> 32) & 0xFFFFFFFF);
+            set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF) | ((ulong)value << 32);
+        }
+
+        // Advanced blend not supported
+
+        private readonly void BuildColorAttachment(MTLRenderPipelineColorAttachmentDescriptor descriptor, ColorBlendStateUid blendState)
+        {
+            descriptor.PixelFormat = blendState.PixelFormat;
+            descriptor.SetBlendingEnabled(blendState.Enable);
+            descriptor.AlphaBlendOperation = blendState.AlphaBlendOperation;
+            descriptor.RgbBlendOperation = blendState.RgbBlendOperation;
+            descriptor.SourceAlphaBlendFactor = blendState.SourceAlphaBlendFactor;
+            descriptor.DestinationAlphaBlendFactor = blendState.DestinationAlphaBlendFactor;
+            descriptor.SourceRGBBlendFactor = blendState.SourceRGBBlendFactor;
+            descriptor.DestinationRGBBlendFactor = blendState.DestinationRGBBlendFactor;
+            descriptor.WriteMask = blendState.WriteMask;
+        }
+
+        private readonly MTLVertexDescriptor BuildVertexDescriptor()
+        {
+            var vertexDescriptor = new MTLVertexDescriptor();
+
+            for (int i = 0; i < VertexAttributeDescriptionsCount; i++)
+            {
+                VertexInputAttributeUid uid = Internal.VertexAttributes[i];
+
+                var attrib = vertexDescriptor.Attributes.Object((ulong)i);
+                attrib.Format = uid.Format;
+                attrib.Offset = uid.Offset;
+                attrib.BufferIndex = uid.BufferIndex;
+            }
+
+            for (int i = 0; i < VertexBindingDescriptionsCount; i++)
+            {
+                VertexInputLayoutUid uid = Internal.VertexBindings[i];
+
+                var layout = vertexDescriptor.Layouts.Object((ulong)i);
+
+                layout.StepFunction = uid.StepFunction;
+                layout.StepRate = uid.StepRate;
+                layout.Stride = uid.Stride;
+            }
+
+            return vertexDescriptor;
+        }
+
+        private MTLRenderPipelineDescriptor CreateRenderDescriptor(Program program)
+        {
+            var renderPipelineDescriptor = new MTLRenderPipelineDescriptor();
+
+            for (int i = 0; i < Constants.MaxColorAttachments; i++)
+            {
+                var blendState = Internal.ColorBlendState[i];
+
+                if (blendState.PixelFormat != MTLPixelFormat.Invalid)
+                {
+                    var pipelineAttachment = renderPipelineDescriptor.ColorAttachments.Object((ulong)i);
+
+                    BuildColorAttachment(pipelineAttachment, blendState);
+                }
+            }
+
+            MTLPixelFormat dsFormat = DepthStencilFormat;
+            if (dsFormat != MTLPixelFormat.Invalid)
+            {
+                switch (dsFormat)
+                {
+                    // Depth Only Attachment
+                    case MTLPixelFormat.Depth16Unorm:
+                    case MTLPixelFormat.Depth32Float:
+                        renderPipelineDescriptor.DepthAttachmentPixelFormat = dsFormat;
+                        break;
+
+                    // Stencil Only Attachment
+                    case MTLPixelFormat.Stencil8:
+                        renderPipelineDescriptor.StencilAttachmentPixelFormat = dsFormat;
+                        break;
+
+                    // Combined Attachment
+                    case MTLPixelFormat.Depth24UnormStencil8:
+                    case MTLPixelFormat.Depth32FloatStencil8:
+                        renderPipelineDescriptor.DepthAttachmentPixelFormat = dsFormat;
+                        renderPipelineDescriptor.StencilAttachmentPixelFormat = dsFormat;
+                        break;
+                    default:
+                        Logger.Error?.PrintMsg(LogClass.Gpu, $"Unsupported Depth/Stencil Format: {dsFormat}!");
+                        break;
+                }
+            }
+
+            renderPipelineDescriptor.LogicOperationEnabled = LogicOpEnable;
+            renderPipelineDescriptor.LogicOperation = LogicOp;
+            renderPipelineDescriptor.AlphaToCoverageEnabled = AlphaToCoverageEnable;
+            renderPipelineDescriptor.AlphaToOneEnabled = AlphaToOneEnable;
+            renderPipelineDescriptor.RasterizationEnabled = !RasterizerDiscardEnable;
+            renderPipelineDescriptor.SampleCount = Math.Max(1, SamplesCount);
+
+            var vertexDescriptor = BuildVertexDescriptor();
+            renderPipelineDescriptor.VertexDescriptor = vertexDescriptor;
+
+            renderPipelineDescriptor.VertexFunction = program.VertexFunction;
+
+            if (program.FragmentFunction.NativePtr != 0)
+            {
+                renderPipelineDescriptor.FragmentFunction = program.FragmentFunction;
+            }
+
+            return renderPipelineDescriptor;
+        }
+
+        public MTLRenderPipelineState CreateRenderPipeline(MTLDevice device, Program program)
+        {
+            if (program.TryGetGraphicsPipeline(ref Internal, out var pipelineState))
+            {
+                return pipelineState;
+            }
+
+            using var descriptor = CreateRenderDescriptor(program);
+
+            var error = new NSError(IntPtr.Zero);
+            pipelineState = device.NewRenderPipelineState(descriptor, ref error);
+            if (error != IntPtr.Zero)
+            {
+                Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create Render Pipeline State: {StringHelper.String(error.LocalizedDescription)}");
+            }
+
+            program.AddGraphicsPipeline(ref Internal, pipelineState);
+
+            return pipelineState;
+        }
+
+        public static MTLComputePipelineDescriptor CreateComputeDescriptor(Program program)
+        {
+            ComputeSize localSize = program.ComputeLocalSize;
+
+            uint maxThreads = (uint)(localSize.X * localSize.Y * localSize.Z);
+
+            if (maxThreads == 0)
+            {
+                throw new InvalidOperationException($"Local thread size for compute cannot be 0 in any dimension.");
+            }
+
+            var descriptor = new MTLComputePipelineDescriptor
+            {
+                ComputeFunction = program.ComputeFunction,
+                MaxTotalThreadsPerThreadgroup = maxThreads,
+                ThreadGroupSizeIsMultipleOfThreadExecutionWidth = true,
+            };
+
+            return descriptor;
+        }
+
+        public static MTLComputePipelineState CreateComputePipeline(MTLDevice device, Program program)
+        {
+            if (program.TryGetComputePipeline(out var pipelineState))
+            {
+                return pipelineState;
+            }
+
+            using MTLComputePipelineDescriptor descriptor = CreateComputeDescriptor(program);
+
+            var error = new NSError(IntPtr.Zero);
+            pipelineState = device.NewComputePipelineState(descriptor, MTLPipelineOption.None, 0, ref error);
+            if (error != IntPtr.Zero)
+            {
+                Logger.Error?.PrintMsg(LogClass.Gpu, $"Failed to create Compute Pipeline State: {StringHelper.String(error.LocalizedDescription)}");
+            }
+
+            program.AddComputePipeline(pipelineState);
+
+            return pipelineState;
+        }
+
+        public void Initialize()
+        {
+            SamplesCount = 1;
+
+            Internal.ResetColorState();
+        }
+
+        /*
+         * TODO, this is from vulkan.
+
+        private void UpdateVertexAttributeDescriptions(VulkanRenderer gd)
+        {
+            // Vertex attributes exceeding the stride are invalid.
+            // In metal, they cause glitches with the vertex shader fetching incorrect values.
+            // To work around this, we reduce the format to something that doesn't exceed the stride if possible.
+            // The assumption is that the exceeding components are not actually accessed on the shader.
+
+            for (int index = 0; index < VertexAttributeDescriptionsCount; index++)
+            {
+                var attribute = Internal.VertexAttributeDescriptions[index];
+                int vbIndex = GetVertexBufferIndex(attribute.Binding);
+
+                if (vbIndex >= 0)
+                {
+                    ref var vb = ref Internal.VertexBindingDescriptions[vbIndex];
+
+                    Format format = attribute.Format;
+
+                    while (vb.Stride != 0 && attribute.Offset + FormatTable.GetAttributeFormatSize(format) > vb.Stride)
+                    {
+                        Format newFormat = FormatTable.DropLastComponent(format);
+
+                        if (newFormat == format)
+                        {
+                            // That case means we failed to find a format that fits within the stride,
+                            // so just restore the original format and give up.
+                            format = attribute.Format;
+                            break;
+                        }
+
+                        format = newFormat;
+                    }
+
+                    if (attribute.Format != format && gd.FormatCapabilities.BufferFormatSupports(FormatFeatureFlags.VertexBufferBit, format))
+                    {
+                        attribute.Format = format;
+                    }
+                }
+
+                _vertexAttributeDescriptions2[index] = attribute;
+            }
+        }
+
+        private int GetVertexBufferIndex(uint binding)
+        {
+            for (int index = 0; index < VertexBindingDescriptionsCount; index++)
+            {
+                if (Internal.VertexBindingDescriptions[index].Binding == binding)
+                {
+                    return index;
+                }
+            }
+
+            return -1;
+        }
+        */
+    }
+}

+ 208 - 0
src/Ryujinx.Graphics.Metal/State/PipelineUid.cs

@@ -0,0 +1,208 @@
+using Ryujinx.Common.Memory;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    struct VertexInputAttributeUid
+    {
+        public ulong Id0;
+
+        public ulong Offset
+        {
+            readonly get => (uint)((Id0 >> 0) & 0xFFFFFFFF);
+            set => Id0 = (Id0 & 0xFFFFFFFF00000000) | ((ulong)value << 0);
+        }
+
+        public MTLVertexFormat Format
+        {
+            readonly get => (MTLVertexFormat)((Id0 >> 32) & 0xFFFF);
+            set => Id0 = (Id0 & 0xFFFF0000FFFFFFFF) | ((ulong)value << 32);
+        }
+
+        public ulong BufferIndex
+        {
+            readonly get => ((Id0 >> 48) & 0xFFFF);
+            set => Id0 = (Id0 & 0x0000FFFFFFFFFFFF) | ((ulong)value << 48);
+        }
+    }
+
+    struct VertexInputLayoutUid
+    {
+        public ulong Id0;
+
+        public uint Stride
+        {
+            readonly get => (uint)((Id0 >> 0) & 0xFFFFFFFF);
+            set => Id0 = (Id0 & 0xFFFFFFFF00000000) | ((ulong)value << 0);
+        }
+
+        public uint StepRate
+        {
+            readonly get => (uint)((Id0 >> 32) & 0x1FFFFFFF);
+            set => Id0 = (Id0 & 0xE0000000FFFFFFFF) | ((ulong)value << 32);
+        }
+
+        public MTLVertexStepFunction StepFunction
+        {
+            readonly get => (MTLVertexStepFunction)((Id0 >> 61) & 0x7);
+            set => Id0 = (Id0 & 0x1FFFFFFFFFFFFFFF) | ((ulong)value << 61);
+        }
+    }
+
+    struct ColorBlendStateUid
+    {
+        public ulong Id0;
+
+        public MTLPixelFormat PixelFormat
+        {
+            readonly get => (MTLPixelFormat)((Id0 >> 0) & 0xFFFF);
+            set => Id0 = (Id0 & 0xFFFFFFFFFFFF0000) | ((ulong)value << 0);
+        }
+
+        public MTLBlendFactor SourceRGBBlendFactor
+        {
+            readonly get => (MTLBlendFactor)((Id0 >> 16) & 0xFF);
+            set => Id0 = (Id0 & 0xFFFFFFFFFF00FFFF) | ((ulong)value << 16);
+        }
+
+        public MTLBlendFactor DestinationRGBBlendFactor
+        {
+            readonly get => (MTLBlendFactor)((Id0 >> 24) & 0xFF);
+            set => Id0 = (Id0 & 0xFFFFFFFF00FFFFFF) | ((ulong)value << 24);
+        }
+
+        public MTLBlendOperation RgbBlendOperation
+        {
+            readonly get => (MTLBlendOperation)((Id0 >> 32) & 0xF);
+            set => Id0 = (Id0 & 0xFFFFFFF0FFFFFFFF) | ((ulong)value << 32);
+        }
+
+        public MTLBlendOperation AlphaBlendOperation
+        {
+            readonly get => (MTLBlendOperation)((Id0 >> 36) & 0xF);
+            set => Id0 = (Id0 & 0xFFFFFF0FFFFFFFFF) | ((ulong)value << 36);
+        }
+
+        public MTLBlendFactor SourceAlphaBlendFactor
+        {
+            readonly get => (MTLBlendFactor)((Id0 >> 40) & 0xFF);
+            set => Id0 = (Id0 & 0xFFFF00FFFFFFFFFF) | ((ulong)value << 40);
+        }
+
+        public MTLBlendFactor DestinationAlphaBlendFactor
+        {
+            readonly get => (MTLBlendFactor)((Id0 >> 48) & 0xFF);
+            set => Id0 = (Id0 & 0xFF00FFFFFFFFFFFF) | ((ulong)value << 48);
+        }
+
+        public MTLColorWriteMask WriteMask
+        {
+            readonly get => (MTLColorWriteMask)((Id0 >> 56) & 0xF);
+            set => Id0 = (Id0 & 0xF0FFFFFFFFFFFFFF) | ((ulong)value << 56);
+        }
+
+        public bool Enable
+        {
+            readonly get => ((Id0 >> 63) & 0x1) != 0UL;
+            set => Id0 = (Id0 & 0x7FFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 63);
+        }
+
+        public void Swap(ColorBlendStateUid uid)
+        {
+            var format = PixelFormat;
+
+            this = uid;
+            PixelFormat = format;
+        }
+    }
+
+    [SupportedOSPlatform("macos")]
+    struct PipelineUid : IRefEquatable<PipelineUid>
+    {
+        public ulong Id0;
+        public ulong Id1;
+
+        private readonly uint VertexAttributeDescriptionsCount => (byte)((Id0 >> 8) & 0xFF);
+        private readonly uint VertexBindingDescriptionsCount => (byte)((Id0 >> 16) & 0xFF);
+        private readonly uint ColorBlendAttachmentStateCount => (byte)((Id0 >> 24) & 0xFF);
+
+        public Array32<VertexInputAttributeUid> VertexAttributes;
+        public Array33<VertexInputLayoutUid> VertexBindings;
+        public Array8<ColorBlendStateUid> ColorBlendState;
+        public uint AttachmentIntegerFormatMask;
+        public bool LogicOpsAllowed;
+
+        public void ResetColorState()
+        {
+            ColorBlendState = new();
+
+            for (int i = 0; i < ColorBlendState.Length; i++)
+            {
+                ColorBlendState[i].WriteMask = MTLColorWriteMask.All;
+            }
+        }
+
+        public readonly override bool Equals(object obj)
+        {
+            return obj is PipelineUid other && Equals(other);
+        }
+
+        public bool Equals(ref PipelineUid other)
+        {
+            if (!Unsafe.As<ulong, Vector128<byte>>(ref Id0).Equals(Unsafe.As<ulong, Vector128<byte>>(ref other.Id0)))
+            {
+                return false;
+            }
+
+            if (!SequenceEqual<VertexInputAttributeUid>(VertexAttributes.AsSpan(), other.VertexAttributes.AsSpan(), VertexAttributeDescriptionsCount))
+            {
+                return false;
+            }
+
+            if (!SequenceEqual<VertexInputLayoutUid>(VertexBindings.AsSpan(), other.VertexBindings.AsSpan(), VertexBindingDescriptionsCount))
+            {
+                return false;
+            }
+
+            if (!SequenceEqual<ColorBlendStateUid>(ColorBlendState.AsSpan(), other.ColorBlendState.AsSpan(), ColorBlendAttachmentStateCount))
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        private static bool SequenceEqual<T>(ReadOnlySpan<T> x, ReadOnlySpan<T> y, uint count) where T : unmanaged
+        {
+            return MemoryMarshal.Cast<T, byte>(x[..(int)count]).SequenceEqual(MemoryMarshal.Cast<T, byte>(y[..(int)count]));
+        }
+
+        public override int GetHashCode()
+        {
+            ulong hash64 = Id0 * 23 ^
+                           Id1 * 23;
+
+            for (int i = 0; i < (int)VertexAttributeDescriptionsCount; i++)
+            {
+                hash64 ^= VertexAttributes[i].Id0 * 23;
+            }
+
+            for (int i = 0; i < (int)VertexBindingDescriptionsCount; i++)
+            {
+                hash64 ^= VertexBindings[i].Id0 * 23;
+            }
+
+            for (int i = 0; i < (int)ColorBlendAttachmentStateCount; i++)
+            {
+                hash64 ^= ColorBlendState[i].Id0 * 23;
+            }
+
+            return (int)hash64 ^ ((int)(hash64 >> 32) * 17);
+        }
+    }
+}

+ 42 - 0
src/Ryujinx.Graphics.Metal/StateCache.cs

@@ -0,0 +1,42 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    abstract class StateCache<T, TDescriptor, THash> : IDisposable where T : IDisposable
+    {
+        private readonly Dictionary<THash, T> _cache = new();
+
+        protected abstract THash GetHash(TDescriptor descriptor);
+
+        protected abstract T CreateValue(TDescriptor descriptor);
+
+        public void Dispose()
+        {
+            foreach (T value in _cache.Values)
+            {
+                value.Dispose();
+            }
+
+            GC.SuppressFinalize(this);
+        }
+
+        public T GetOrCreate(TDescriptor descriptor)
+        {
+            var hash = GetHash(descriptor);
+            if (_cache.TryGetValue(hash, out T value))
+            {
+                return value;
+            }
+            else
+            {
+                var newValue = CreateValue(descriptor);
+                _cache.Add(hash, newValue);
+
+                return newValue;
+            }
+        }
+    }
+}

+ 30 - 0
src/Ryujinx.Graphics.Metal/StringHelper.cs

@@ -0,0 +1,30 @@
+using SharpMetal.Foundation;
+using SharpMetal.ObjectiveCCore;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class StringHelper
+    {
+        public static NSString NSString(string source)
+        {
+            return new(ObjectiveC.IntPtr_objc_msgSend(new ObjectiveCClass("NSString"), "stringWithUTF8String:", source));
+        }
+
+        public static unsafe string String(NSString source)
+        {
+            char[] sourceBuffer = new char[source.Length];
+            fixed (char* pSourceBuffer = sourceBuffer)
+            {
+                ObjectiveC.bool_objc_msgSend(source,
+                    "getCString:maxLength:encoding:",
+                    pSourceBuffer,
+                    source.MaximumLengthOfBytes(NSStringEncoding.UTF16) + 1,
+                    (ulong)NSStringEncoding.UTF16);
+            }
+
+            return new string(sourceBuffer);
+        }
+    }
+}

+ 214 - 0
src/Ryujinx.Graphics.Metal/SyncManager.cs

@@ -0,0 +1,214 @@
+using Ryujinx.Common.Logging;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class SyncManager
+    {
+        private class SyncHandle
+        {
+            public ulong ID;
+            public MultiFenceHolder Waitable;
+            public ulong FlushId;
+            public bool Signalled;
+
+            public bool NeedsFlush(ulong currentFlushId)
+            {
+                return (long)(FlushId - currentFlushId) >= 0;
+            }
+        }
+
+        private ulong _firstHandle;
+
+        private readonly MetalRenderer _renderer;
+        private readonly List<SyncHandle> _handles;
+        private ulong _flushId;
+        private long _waitTicks;
+
+        public SyncManager(MetalRenderer renderer)
+        {
+            _renderer = renderer;
+            _handles = new List<SyncHandle>();
+        }
+
+        public void RegisterFlush()
+        {
+            _flushId++;
+        }
+
+        public void Create(ulong id, bool strict)
+        {
+            ulong flushId = _flushId;
+            MultiFenceHolder waitable = new();
+            if (strict || _renderer.InterruptAction == null)
+            {
+                _renderer.FlushAllCommands();
+                _renderer.CommandBufferPool.AddWaitable(waitable);
+            }
+            else
+            {
+                // Don't flush commands, instead wait for the current command buffer to finish.
+                // If this sync is waited on before the command buffer is submitted, interrupt the gpu thread and flush it manually.
+
+                _renderer.CommandBufferPool.AddInUseWaitable(waitable);
+            }
+
+            SyncHandle handle = new()
+            {
+                ID = id,
+                Waitable = waitable,
+                FlushId = flushId,
+            };
+
+            lock (_handles)
+            {
+                _handles.Add(handle);
+            }
+        }
+
+        public ulong GetCurrent()
+        {
+            lock (_handles)
+            {
+                ulong lastHandle = _firstHandle;
+
+                foreach (SyncHandle handle in _handles)
+                {
+                    lock (handle)
+                    {
+                        if (handle.Waitable == null)
+                        {
+                            continue;
+                        }
+
+                        if (handle.ID > lastHandle)
+                        {
+                            bool signaled = handle.Signalled || handle.Waitable.WaitForFences(false);
+                            if (signaled)
+                            {
+                                lastHandle = handle.ID;
+                                handle.Signalled = true;
+                            }
+                        }
+                    }
+                }
+
+                return lastHandle;
+            }
+        }
+
+        public void Wait(ulong id)
+        {
+            SyncHandle result = null;
+
+            lock (_handles)
+            {
+                if ((long)(_firstHandle - id) > 0)
+                {
+                    return; // The handle has already been signalled or deleted.
+                }
+
+                foreach (SyncHandle handle in _handles)
+                {
+                    if (handle.ID == id)
+                    {
+                        result = handle;
+                        break;
+                    }
+                }
+            }
+
+            if (result != null)
+            {
+                if (result.Waitable == null)
+                {
+                    return;
+                }
+
+                long beforeTicks = Stopwatch.GetTimestamp();
+
+                if (result.NeedsFlush(_flushId))
+                {
+                    _renderer.InterruptAction(() =>
+                    {
+                        if (result.NeedsFlush(_flushId))
+                        {
+                            _renderer.FlushAllCommands();
+                        }
+                    });
+                }
+
+                lock (result)
+                {
+                    if (result.Waitable == null)
+                    {
+                        return;
+                    }
+
+                    bool signaled = result.Signalled || result.Waitable.WaitForFences(true);
+
+                    if (!signaled)
+                    {
+                        Logger.Error?.PrintMsg(LogClass.Gpu, $"Metal Sync Object {result.ID} failed to signal within 1000ms. Continuing...");
+                    }
+                    else
+                    {
+                        _waitTicks += Stopwatch.GetTimestamp() - beforeTicks;
+                        result.Signalled = true;
+                    }
+                }
+            }
+        }
+
+        public void Cleanup()
+        {
+            // Iterate through handles and remove any that have already been signalled.
+
+            while (true)
+            {
+                SyncHandle first = null;
+                lock (_handles)
+                {
+                    first = _handles.FirstOrDefault();
+                }
+
+                if (first == null || first.NeedsFlush(_flushId))
+                {
+                    break;
+                }
+
+                bool signaled = first.Waitable.WaitForFences(false);
+                if (signaled)
+                {
+                    // Delete the sync object.
+                    lock (_handles)
+                    {
+                        lock (first)
+                        {
+                            _firstHandle = first.ID + 1;
+                            _handles.RemoveAt(0);
+                            first.Waitable = null;
+                        }
+                    }
+                }
+                else
+                {
+                    // This sync handle and any following have not been reached yet.
+                    break;
+                }
+            }
+        }
+
+        public long GetAndResetWaitTicks()
+        {
+            long result = _waitTicks;
+            _waitTicks = 0;
+
+            return result;
+        }
+    }
+}

+ 654 - 0
src/Ryujinx.Graphics.Metal/Texture.cs

@@ -0,0 +1,654 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Foundation;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class Texture : TextureBase, ITexture
+    {
+        private MTLTexture _identitySwizzleHandle;
+        private readonly bool _identityIsDifferent;
+
+        public Texture(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info) : base(device, renderer, pipeline, info)
+        {
+            MTLPixelFormat pixelFormat = FormatTable.GetFormat(Info.Format);
+
+            var descriptor = new MTLTextureDescriptor
+            {
+                PixelFormat = pixelFormat,
+                Usage = MTLTextureUsage.Unknown,
+                SampleCount = (ulong)Info.Samples,
+                TextureType = Info.Target.Convert(),
+                Width = (ulong)Info.Width,
+                Height = (ulong)Info.Height,
+                MipmapLevelCount = (ulong)Info.Levels
+            };
+
+            if (info.Target == Target.Texture3D)
+            {
+                descriptor.Depth = (ulong)Info.Depth;
+            }
+            else if (info.Target != Target.Cubemap)
+            {
+                if (info.Target == Target.CubemapArray)
+                {
+                    descriptor.ArrayLength = (ulong)(Info.Depth / 6);
+                }
+                else
+                {
+                    descriptor.ArrayLength = (ulong)Info.Depth;
+                }
+            }
+
+            MTLTextureSwizzleChannels swizzle = GetSwizzle(info, descriptor.PixelFormat);
+
+            _identitySwizzleHandle = Device.NewTexture(descriptor);
+
+            if (SwizzleIsIdentity(swizzle))
+            {
+                MtlTexture = _identitySwizzleHandle;
+            }
+            else
+            {
+                MtlTexture = CreateDefaultView(_identitySwizzleHandle, swizzle, descriptor);
+                _identityIsDifferent = true;
+            }
+
+            MtlFormat = pixelFormat;
+            descriptor.Dispose();
+        }
+
+        public Texture(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info, MTLTexture sourceTexture, int firstLayer, int firstLevel) : base(device, renderer, pipeline, info)
+        {
+            var pixelFormat = FormatTable.GetFormat(Info.Format);
+
+            if (info.DepthStencilMode == DepthStencilMode.Stencil)
+            {
+                pixelFormat = pixelFormat switch
+                {
+                    MTLPixelFormat.Depth32FloatStencil8 => MTLPixelFormat.X32Stencil8,
+                    MTLPixelFormat.Depth24UnormStencil8 => MTLPixelFormat.X24Stencil8,
+                    _ => pixelFormat
+                };
+            }
+
+            var textureType = Info.Target.Convert();
+            NSRange levels;
+            levels.location = (ulong)firstLevel;
+            levels.length = (ulong)Info.Levels;
+            NSRange slices;
+            slices.location = (ulong)firstLayer;
+            slices.length = textureType == MTLTextureType.Type3D ? 1 : (ulong)info.GetDepthOrLayers();
+
+            var swizzle = GetSwizzle(info, pixelFormat);
+
+            _identitySwizzleHandle = sourceTexture.NewTextureView(pixelFormat, textureType, levels, slices);
+
+            if (SwizzleIsIdentity(swizzle))
+            {
+                MtlTexture = _identitySwizzleHandle;
+            }
+            else
+            {
+                MtlTexture = sourceTexture.NewTextureView(pixelFormat, textureType, levels, slices, swizzle);
+                _identityIsDifferent = true;
+            }
+
+            MtlFormat = pixelFormat;
+            FirstLayer = firstLayer;
+            FirstLevel = firstLevel;
+        }
+
+        public void PopulateRenderPassAttachment(MTLRenderPassColorAttachmentDescriptor descriptor)
+        {
+            descriptor.Texture = _identitySwizzleHandle;
+        }
+
+        private MTLTexture CreateDefaultView(MTLTexture texture, MTLTextureSwizzleChannels swizzle, MTLTextureDescriptor descriptor)
+        {
+            NSRange levels;
+            levels.location = 0;
+            levels.length = (ulong)Info.Levels;
+            NSRange slices;
+            slices.location = 0;
+            slices.length = Info.Target == Target.Texture3D ? 1 : (ulong)Info.GetDepthOrLayers();
+
+            return texture.NewTextureView(descriptor.PixelFormat, descriptor.TextureType, levels, slices, swizzle);
+        }
+
+        private bool SwizzleIsIdentity(MTLTextureSwizzleChannels swizzle)
+        {
+            return swizzle.red == MTLTextureSwizzle.Red &&
+                   swizzle.green == MTLTextureSwizzle.Green &&
+                   swizzle.blue == MTLTextureSwizzle.Blue &&
+                   swizzle.alpha == MTLTextureSwizzle.Alpha;
+        }
+
+        private MTLTextureSwizzleChannels GetSwizzle(TextureCreateInfo info, MTLPixelFormat pixelFormat)
+        {
+            var swizzleR = Info.SwizzleR.Convert();
+            var swizzleG = Info.SwizzleG.Convert();
+            var swizzleB = Info.SwizzleB.Convert();
+            var swizzleA = Info.SwizzleA.Convert();
+
+            if (info.Format == Format.R5G5B5A1Unorm ||
+                info.Format == Format.R5G5B5X1Unorm ||
+                info.Format == Format.R5G6B5Unorm)
+            {
+                (swizzleB, swizzleR) = (swizzleR, swizzleB);
+            }
+            else if (pixelFormat == MTLPixelFormat.ABGR4Unorm || info.Format == Format.A1B5G5R5Unorm)
+            {
+                var tempB = swizzleB;
+                var tempA = swizzleA;
+
+                swizzleB = swizzleG;
+                swizzleA = swizzleR;
+                swizzleR = tempA;
+                swizzleG = tempB;
+            }
+
+            return new MTLTextureSwizzleChannels
+            {
+                red = swizzleR,
+                green = swizzleG,
+                blue = swizzleB,
+                alpha = swizzleA
+            };
+        }
+
+        public void CopyTo(ITexture destination, int firstLayer, int firstLevel)
+        {
+            CommandBufferScoped cbs = Pipeline.Cbs;
+
+            TextureBase src = this;
+            TextureBase dst = (TextureBase)destination;
+
+            if (!Valid || !dst.Valid)
+            {
+                return;
+            }
+
+            var srcImage = GetHandle();
+            var dstImage = dst.GetHandle();
+
+            if (!dst.Info.Target.IsMultisample() && Info.Target.IsMultisample())
+            {
+                // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer);
+
+                // _gd.HelperShader.CopyMSToNonMS(_gd, cbs, src, dst, 0, firstLayer, layers);
+            }
+            else if (dst.Info.Target.IsMultisample() && !Info.Target.IsMultisample())
+            {
+                // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer);
+
+                // _gd.HelperShader.CopyNonMSToMS(_gd, cbs, src, dst, 0, firstLayer, layers);
+            }
+            else if (dst.Info.BytesPerPixel != Info.BytesPerPixel)
+            {
+                // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer);
+                // int levels = Math.Min(Info.Levels, dst.Info.Levels - firstLevel);
+
+                // _gd.HelperShader.CopyIncompatibleFormats(_gd, cbs, src, dst, 0, firstLayer, 0, firstLevel, layers, levels);
+            }
+            else if (src.Info.Format.IsDepthOrStencil() != dst.Info.Format.IsDepthOrStencil())
+            {
+                // int layers = Math.Min(Info.GetLayers(), dst.Info.GetLayers() - firstLayer);
+                // int levels = Math.Min(Info.Levels, dst.Info.Levels - firstLevel);
+
+                // TODO: depth copy?
+                // _gd.HelperShader.CopyColor(_gd, cbs, src, dst, 0, firstLayer, 0, FirstLevel, layers, levels);
+            }
+            else
+            {
+                TextureCopy.Copy(
+                    cbs,
+                    srcImage,
+                    dstImage,
+                    src.Info,
+                    dst.Info,
+                    0,
+                    firstLayer,
+                    0,
+                    firstLevel);
+            }
+        }
+
+        public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel)
+        {
+            CommandBufferScoped cbs = Pipeline.Cbs;
+
+            TextureBase src = this;
+            TextureBase dst = (TextureBase)destination;
+
+            if (!Valid || !dst.Valid)
+            {
+                return;
+            }
+
+            var srcImage = GetHandle();
+            var dstImage = dst.GetHandle();
+
+            if (!dst.Info.Target.IsMultisample() && Info.Target.IsMultisample())
+            {
+                // _gd.HelperShader.CopyMSToNonMS(_gd, cbs, src, dst, srcLayer, dstLayer, 1);
+            }
+            else if (dst.Info.Target.IsMultisample() && !Info.Target.IsMultisample())
+            {
+                // _gd.HelperShader.CopyNonMSToMS(_gd, cbs, src, dst, srcLayer, dstLayer, 1);
+            }
+            else if (dst.Info.BytesPerPixel != Info.BytesPerPixel)
+            {
+                // _gd.HelperShader.CopyIncompatibleFormats(_gd, cbs, src, dst, srcLayer, dstLayer, srcLevel, dstLevel, 1, 1);
+            }
+            else if (src.Info.Format.IsDepthOrStencil() != dst.Info.Format.IsDepthOrStencil())
+            {
+                // _gd.HelperShader.CopyColor(_gd, cbs, src, dst, srcLayer, dstLayer, srcLevel, dstLevel, 1, 1);
+            }
+            else
+            {
+                TextureCopy.Copy(
+                    cbs,
+                    srcImage,
+                    dstImage,
+                    src.Info,
+                    dst.Info,
+                    srcLayer,
+                    dstLayer,
+                    srcLevel,
+                    dstLevel,
+                    1,
+                    1);
+            }
+        }
+
+        public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter)
+        {
+            if (!Renderer.CommandBufferPool.OwnedByCurrentThread)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, "Metal doesn't currently support scaled blit on background thread.");
+
+                return;
+            }
+
+            var dst = (Texture)destination;
+
+            bool isDepthOrStencil = dst.Info.Format.IsDepthOrStencil();
+
+            Pipeline.Blit(this, dst, srcRegion, dstRegion, isDepthOrStencil, linearFilter);
+        }
+
+        public void CopyTo(BufferRange range, int layer, int level, int stride)
+        {
+            var cbs = Pipeline.Cbs;
+
+            int outSize = Info.GetMipSize(level);
+            int hostSize = GetBufferDataLength(outSize);
+
+            int offset = range.Offset;
+
+            var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true);
+            var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value;
+
+            if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder))
+            {
+                offset = 0;
+            }
+
+            CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride);
+
+            if (tempCopyHolder != null)
+            {
+                CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset);
+                tempCopyHolder.Dispose();
+            }
+        }
+
+        public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
+        {
+            return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel);
+        }
+
+        private void CopyDataToBuffer(Span<byte> storage, ReadOnlySpan<byte> input)
+        {
+            if (NeedsD24S8Conversion())
+            {
+                FormatConverter.ConvertD24S8ToD32FS8(storage, input);
+                return;
+            }
+
+            input.CopyTo(storage);
+        }
+
+        private ReadOnlySpan<byte> GetDataFromBuffer(ReadOnlySpan<byte> storage, int size, Span<byte> output)
+        {
+            if (NeedsD24S8Conversion())
+            {
+                if (output.IsEmpty)
+                {
+                    output = new byte[GetBufferDataLength(size)];
+                }
+
+                FormatConverter.ConvertD32FS8ToD24S8(output, storage);
+                return output;
+            }
+
+            return storage;
+        }
+
+        private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder)
+        {
+            if (NeedsD24S8Conversion())
+            {
+                copyTargetHolder = Renderer.BufferManager.Create(hostSize);
+                copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value;
+
+                return true;
+            }
+
+            copyTarget = target;
+            copyTargetHolder = null;
+
+            return false;
+        }
+
+        private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto<DisposableBuffer> copyTarget, int hostSize, int dstOffset)
+        {
+            if (NeedsD24S8Conversion())
+            {
+                Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset);
+            }
+        }
+
+        private bool NeedsD24S8Conversion()
+        {
+            return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8;
+        }
+
+        public void CopyFromOrToBuffer(
+            CommandBufferScoped cbs,
+            MTLBuffer buffer,
+            MTLTexture image,
+            int size,
+            bool to,
+            int dstLayer,
+            int dstLevel,
+            int dstLayers,
+            int dstLevels,
+            bool singleSlice,
+            int offset = 0,
+            int stride = 0)
+        {
+            MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder();
+
+            bool is3D = Info.Target == Target.Texture3D;
+            int width = Math.Max(1, Info.Width >> dstLevel);
+            int height = Math.Max(1, Info.Height >> dstLevel);
+            int depth = is3D && !singleSlice ? Math.Max(1, Info.Depth >> dstLevel) : 1;
+            int layers = dstLayers;
+            int levels = dstLevels;
+
+            for (int oLevel = 0; oLevel < levels; oLevel++)
+            {
+                int level = oLevel + dstLevel;
+                int mipSize = Info.GetMipSize2D(level);
+
+                int mipSizeLevel = GetBufferDataLength(is3D && !singleSlice
+                    ? Info.GetMipSize(level)
+                    : mipSize * dstLayers);
+
+                int endOffset = offset + mipSizeLevel;
+
+                if ((uint)endOffset > (uint)size)
+                {
+                    break;
+                }
+
+                for (int oLayer = 0; oLayer < layers; oLayer++)
+                {
+                    int layer = !is3D ? dstLayer + oLayer : 0;
+                    int z = is3D ? dstLayer + oLayer : 0;
+
+                    if (to)
+                    {
+                        blitCommandEncoder.CopyFromTexture(
+                            image,
+                            (ulong)layer,
+                            (ulong)level,
+                            new MTLOrigin { z = (ulong)z },
+                            new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 },
+                            buffer,
+                            (ulong)offset,
+                            (ulong)Info.GetMipStride(level),
+                            (ulong)mipSize
+                        );
+                    }
+                    else
+                    {
+                        blitCommandEncoder.CopyFromBuffer(
+                            buffer,
+                            (ulong)offset,
+                            (ulong)Info.GetMipStride(level),
+                            (ulong)mipSize,
+                            new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 },
+                            image,
+                            (ulong)(layer + oLayer),
+                            (ulong)level,
+                            new MTLOrigin { z = (ulong)z }
+                        );
+                    }
+
+                    offset += mipSize;
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+
+                if (Info.Target == Target.Texture3D)
+                {
+                    depth = Math.Max(1, depth >> 1);
+                }
+            }
+        }
+
+        private ReadOnlySpan<byte> GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer)
+        {
+            int size = 0;
+
+            for (int level = 0; level < Info.Levels; level++)
+            {
+                size += Info.GetMipSize(level);
+            }
+
+            size = GetBufferDataLength(size);
+
+            Span<byte> result = flushBuffer.GetTextureData(cbp, this, size);
+
+            return GetDataFromBuffer(result, size, result);
+        }
+
+        private ReadOnlySpan<byte> GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer, int layer, int level)
+        {
+            int size = GetBufferDataLength(Info.GetMipSize(level));
+
+            Span<byte> result = flushBuffer.GetTextureData(cbp, this, size, layer, level);
+
+            return GetDataFromBuffer(result, size, result);
+        }
+
+        public PinnedSpan<byte> GetData()
+        {
+            BackgroundResource resources = Renderer.BackgroundResources.Get();
+
+            if (Renderer.CommandBufferPool.OwnedByCurrentThread)
+            {
+                Renderer.FlushAllCommands();
+
+                return PinnedSpan<byte>.UnsafeFromSpan(GetData(Renderer.CommandBufferPool, resources.GetFlushBuffer()));
+            }
+
+            return PinnedSpan<byte>.UnsafeFromSpan(GetData(resources.GetPool(), resources.GetFlushBuffer()));
+        }
+
+        public PinnedSpan<byte> GetData(int layer, int level)
+        {
+            BackgroundResource resources = Renderer.BackgroundResources.Get();
+
+            if (Renderer.CommandBufferPool.OwnedByCurrentThread)
+            {
+                Renderer.FlushAllCommands();
+
+                return PinnedSpan<byte>.UnsafeFromSpan(GetData(Renderer.CommandBufferPool, resources.GetFlushBuffer(), layer, level));
+            }
+
+            return PinnedSpan<byte>.UnsafeFromSpan(GetData(resources.GetPool(), resources.GetFlushBuffer(), layer, level));
+        }
+
+        public void SetData(MemoryOwner<byte> data)
+        {
+            var blitCommandEncoder = Pipeline.GetOrCreateBlitEncoder();
+
+            var dataSpan = data.Memory.Span;
+
+            var buffer = Renderer.BufferManager.Create(dataSpan.Length);
+            buffer.SetDataUnchecked(0, dataSpan);
+            var mtlBuffer = buffer.GetBuffer(false).Get(Pipeline.Cbs).Value;
+
+            int width = Info.Width;
+            int height = Info.Height;
+            int depth = Info.Depth;
+            int levels = Info.Levels;
+            int layers = Info.GetLayers();
+            bool is3D = Info.Target == Target.Texture3D;
+
+            int offset = 0;
+
+            for (int level = 0; level < levels; level++)
+            {
+                int mipSize = Info.GetMipSize2D(level);
+                int endOffset = offset + mipSize;
+
+                if ((uint)endOffset > (uint)dataSpan.Length)
+                {
+                    return;
+                }
+
+                for (int layer = 0; layer < layers; layer++)
+                {
+                    blitCommandEncoder.CopyFromBuffer(
+                        mtlBuffer,
+                        (ulong)offset,
+                        (ulong)Info.GetMipStride(level),
+                        (ulong)mipSize,
+                        new MTLSize { width = (ulong)width, height = (ulong)height, depth = is3D ? (ulong)depth : 1 },
+                        MtlTexture,
+                        (ulong)layer,
+                        (ulong)level,
+                        new MTLOrigin()
+                    );
+
+                    offset += mipSize;
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+
+                if (is3D)
+                {
+                    depth = Math.Max(1, depth >> 1);
+                }
+            }
+
+            // Cleanup
+            buffer.Dispose();
+        }
+
+        private void SetData(ReadOnlySpan<byte> data, int layer, int level, int layers, int levels, bool singleSlice)
+        {
+            int bufferDataLength = GetBufferDataLength(data.Length);
+
+            using var bufferHolder = Renderer.BufferManager.Create(bufferDataLength);
+
+            // TODO: loadInline logic
+
+            var cbs = Pipeline.Cbs;
+
+            CopyDataToBuffer(bufferHolder.GetDataStorage(0, bufferDataLength), data);
+
+            var buffer = bufferHolder.GetBuffer().Get(cbs).Value;
+            var image = GetHandle();
+
+            CopyFromOrToBuffer(cbs, buffer, image, bufferDataLength, false, layer, level, layers, levels, singleSlice);
+        }
+
+        public void SetData(MemoryOwner<byte> data, int layer, int level)
+        {
+            SetData(data.Memory.Span, layer, level, 1, 1, singleSlice: true);
+
+            data.Dispose();
+        }
+
+        public void SetData(MemoryOwner<byte> data, int layer, int level, Rectangle<int> region)
+        {
+            var blitCommandEncoder = Pipeline.GetOrCreateBlitEncoder();
+
+            ulong bytesPerRow = (ulong)Info.GetMipStride(level);
+            ulong bytesPerImage = 0;
+            if (MtlTexture.TextureType == MTLTextureType.Type3D)
+            {
+                bytesPerImage = bytesPerRow * (ulong)Info.Height;
+            }
+
+            var dataSpan = data.Memory.Span;
+
+            var buffer = Renderer.BufferManager.Create(dataSpan.Length);
+            buffer.SetDataUnchecked(0, dataSpan);
+            var mtlBuffer = buffer.GetBuffer(false).Get(Pipeline.Cbs).Value;
+
+            blitCommandEncoder.CopyFromBuffer(
+                mtlBuffer,
+                0,
+                bytesPerRow,
+                bytesPerImage,
+                new MTLSize { width = (ulong)region.Width, height = (ulong)region.Height, depth = 1 },
+                MtlTexture,
+                (ulong)layer,
+                (ulong)level,
+                new MTLOrigin { x = (ulong)region.X, y = (ulong)region.Y }
+            );
+
+            // Cleanup
+            buffer.Dispose();
+        }
+
+        private int GetBufferDataLength(int length)
+        {
+            if (NeedsD24S8Conversion())
+            {
+                return length * 2;
+            }
+
+            return length;
+        }
+
+        public void SetStorage(BufferRange buffer)
+        {
+            throw new NotImplementedException();
+        }
+
+        public override void Release()
+        {
+            if (_identityIsDifferent)
+            {
+                _identitySwizzleHandle.Dispose();
+            }
+
+            base.Release();
+        }
+    }
+}

+ 93 - 0
src/Ryujinx.Graphics.Metal/TextureArray.cs

@@ -0,0 +1,93 @@
+using Ryujinx.Graphics.GAL;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    internal class TextureArray : ITextureArray
+    {
+        private readonly TextureRef[] _textureRefs;
+        private readonly TextureBuffer[] _bufferTextureRefs;
+
+        private readonly bool _isBuffer;
+        private readonly Pipeline _pipeline;
+
+        public TextureArray(int size, bool isBuffer, Pipeline pipeline)
+        {
+            if (isBuffer)
+            {
+                _bufferTextureRefs = new TextureBuffer[size];
+            }
+            else
+            {
+                _textureRefs = new TextureRef[size];
+            }
+
+            _isBuffer = isBuffer;
+            _pipeline = pipeline;
+        }
+
+        public void SetSamplers(int index, ISampler[] samplers)
+        {
+            for (int i = 0; i < samplers.Length; i++)
+            {
+                ISampler sampler = samplers[i];
+
+                if (sampler is SamplerHolder samp)
+                {
+                    _textureRefs[index + i].Sampler = samp.GetSampler();
+                }
+                else
+                {
+                    _textureRefs[index + i].Sampler = default;
+                }
+            }
+
+            SetDirty();
+        }
+
+        public void SetTextures(int index, ITexture[] textures)
+        {
+            for (int i = 0; i < textures.Length; i++)
+            {
+                ITexture texture = textures[i];
+
+                if (texture is TextureBuffer textureBuffer)
+                {
+                    _bufferTextureRefs[index + i] = textureBuffer;
+                }
+                else if (texture is Texture tex)
+                {
+                    _textureRefs[index + i].Storage = tex;
+                }
+                else if (!_isBuffer)
+                {
+                    _textureRefs[index + i].Storage = null;
+                }
+                else
+                {
+                    _bufferTextureRefs[index + i] = null;
+                }
+            }
+
+            SetDirty();
+        }
+
+        public TextureRef[] GetTextureRefs()
+        {
+            return _textureRefs;
+        }
+
+        public TextureBuffer[] GetBufferTextureRefs()
+        {
+            return _bufferTextureRefs;
+        }
+
+        private void SetDirty()
+        {
+            _pipeline.DirtyTextures();
+        }
+
+        public void Dispose() { }
+    }
+}

+ 67 - 0
src/Ryujinx.Graphics.Metal/TextureBase.cs

@@ -0,0 +1,67 @@
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    abstract class TextureBase : IDisposable
+    {
+        private int _isValid = 1;
+
+        public bool Valid => Volatile.Read(ref _isValid) != 0;
+
+        protected readonly Pipeline Pipeline;
+        protected readonly MTLDevice Device;
+        protected readonly MetalRenderer Renderer;
+
+        protected MTLTexture MtlTexture;
+
+        public readonly TextureCreateInfo Info;
+        public int Width => Info.Width;
+        public int Height => Info.Height;
+        public int Depth => Info.Depth;
+
+        public MTLPixelFormat MtlFormat { get; protected set; }
+        public int FirstLayer { get; protected set; }
+        public int FirstLevel { get; protected set; }
+
+        public TextureBase(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info)
+        {
+            Device = device;
+            Renderer = renderer;
+            Pipeline = pipeline;
+            Info = info;
+        }
+
+        public MTLTexture GetHandle()
+        {
+            if (_isValid == 0)
+            {
+                return new MTLTexture(IntPtr.Zero);
+            }
+
+            return MtlTexture;
+        }
+
+        public virtual void Release()
+        {
+            Dispose();
+        }
+
+        public void Dispose()
+        {
+            bool wasValid = Interlocked.Exchange(ref _isValid, 0) != 0;
+
+            if (wasValid)
+            {
+                if (MtlTexture != IntPtr.Zero)
+                {
+                    MtlTexture.Dispose();
+                }
+            }
+        }
+    }
+}

+ 132 - 0
src/Ryujinx.Graphics.Metal/TextureBuffer.cs

@@ -0,0 +1,132 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class TextureBuffer : TextureBase, ITexture
+    {
+        private MTLTextureDescriptor _descriptor;
+        private BufferHandle _bufferHandle;
+        private int _offset;
+        private int _size;
+
+        private int _bufferCount;
+        private Auto<DisposableBuffer> _buffer;
+
+        public TextureBuffer(MTLDevice device, MetalRenderer renderer, Pipeline pipeline, TextureCreateInfo info) : base(device, renderer, pipeline, info)
+        {
+            MTLPixelFormat pixelFormat = FormatTable.GetFormat(Info.Format);
+
+            _descriptor = new MTLTextureDescriptor
+            {
+                PixelFormat = pixelFormat,
+                Usage = MTLTextureUsage.Unknown,
+                TextureType = MTLTextureType.TextureBuffer,
+                Width = (ulong)Info.Width,
+                Height = (ulong)Info.Height,
+            };
+
+            MtlFormat = pixelFormat;
+        }
+
+        public void RebuildStorage(bool write)
+        {
+            if (MtlTexture != IntPtr.Zero)
+            {
+                MtlTexture.Dispose();
+            }
+
+            if (_buffer == null)
+            {
+                MtlTexture = default;
+            }
+            else
+            {
+                DisposableBuffer buffer = _buffer.Get(Pipeline.Cbs, _offset, _size, write);
+
+                _descriptor.Width = (uint)(_size / Info.BytesPerPixel);
+                MtlTexture = buffer.Value.NewTexture(_descriptor, (ulong)_offset, (ulong)_size);
+            }
+        }
+
+        public void CopyTo(ITexture destination, int firstLayer, int firstLevel)
+        {
+            throw new NotSupportedException();
+        }
+
+        public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel)
+        {
+            throw new NotSupportedException();
+        }
+
+        public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter)
+        {
+            throw new NotSupportedException();
+        }
+
+        public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
+        {
+            throw new NotSupportedException();
+        }
+
+        public PinnedSpan<byte> GetData()
+        {
+            return Renderer.GetBufferData(_bufferHandle, _offset, _size);
+        }
+
+        public PinnedSpan<byte> GetData(int layer, int level)
+        {
+            return GetData();
+        }
+
+        public void CopyTo(BufferRange range, int layer, int level, int stride)
+        {
+            throw new NotImplementedException();
+        }
+
+        public void SetData(MemoryOwner<byte> data)
+        {
+            Renderer.SetBufferData(_bufferHandle, _offset, data.Memory.Span);
+            data.Dispose();
+        }
+
+        public void SetData(MemoryOwner<byte> data, int layer, int level)
+        {
+            throw new NotSupportedException();
+        }
+
+        public void SetData(MemoryOwner<byte> data, int layer, int level, Rectangle<int> region)
+        {
+            throw new NotSupportedException();
+        }
+
+        public void SetStorage(BufferRange buffer)
+        {
+            if (_bufferHandle == buffer.Handle &&
+                _offset == buffer.Offset &&
+                _size == buffer.Size &&
+                _bufferCount == Renderer.BufferManager.BufferCount)
+            {
+                return;
+            }
+
+            _bufferHandle = buffer.Handle;
+            _offset = buffer.Offset;
+            _size = buffer.Size;
+            _bufferCount = Renderer.BufferManager.BufferCount;
+
+            _buffer = Renderer.BufferManager.GetBuffer(_bufferHandle, false);
+        }
+
+        public override void Release()
+        {
+            _descriptor.Dispose();
+
+            base.Release();
+        }
+    }
+}

+ 265 - 0
src/Ryujinx.Graphics.Metal/TextureCopy.cs

@@ -0,0 +1,265 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    static class TextureCopy
+    {
+        public static ulong CopyFromOrToBuffer(
+            CommandBufferScoped cbs,
+            MTLBuffer buffer,
+            MTLTexture image,
+            TextureCreateInfo info,
+            bool to,
+            int dstLayer,
+            int dstLevel,
+            int x,
+            int y,
+            int width,
+            int height,
+            ulong offset = 0)
+        {
+            MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder();
+
+            bool is3D = info.Target == Target.Texture3D;
+
+            int blockWidth = BitUtils.DivRoundUp(width, info.BlockWidth);
+            int blockHeight = BitUtils.DivRoundUp(height, info.BlockHeight);
+            ulong bytesPerRow = (ulong)BitUtils.AlignUp(blockWidth * info.BytesPerPixel, 4);
+            ulong bytesPerImage = bytesPerRow * (ulong)blockHeight;
+
+            MTLOrigin origin = new MTLOrigin { x = (ulong)x, y = (ulong)y, z = is3D ? (ulong)dstLayer : 0 };
+            MTLSize region = new MTLSize { width = (ulong)width, height = (ulong)height, depth = 1 };
+
+            uint layer = is3D ? 0 : (uint)dstLayer;
+
+            if (to)
+            {
+                blitCommandEncoder.CopyFromTexture(
+                    image,
+                    layer,
+                    (ulong)dstLevel,
+                    origin,
+                    region,
+                    buffer,
+                    offset,
+                    bytesPerRow,
+                    bytesPerImage);
+            }
+            else
+            {
+                blitCommandEncoder.CopyFromBuffer(buffer, offset, bytesPerRow, bytesPerImage, region, image, layer, (ulong)dstLevel, origin);
+            }
+
+            return offset + bytesPerImage;
+        }
+
+        public static void Copy(
+            CommandBufferScoped cbs,
+            MTLTexture srcImage,
+            MTLTexture dstImage,
+            TextureCreateInfo srcInfo,
+            TextureCreateInfo dstInfo,
+            int srcLayer,
+            int dstLayer,
+            int srcLevel,
+            int dstLevel)
+        {
+            int srcDepth = srcInfo.GetDepthOrLayers();
+            int srcLevels = srcInfo.Levels;
+
+            int dstDepth = dstInfo.GetDepthOrLayers();
+            int dstLevels = dstInfo.Levels;
+
+            if (dstInfo.Target == Target.Texture3D)
+            {
+                dstDepth = Math.Max(1, dstDepth >> dstLevel);
+            }
+
+            int depth = Math.Min(srcDepth, dstDepth);
+            int levels = Math.Min(srcLevels, dstLevels);
+
+            Copy(
+                cbs,
+                srcImage,
+                dstImage,
+                srcInfo,
+                dstInfo,
+                srcLayer,
+                dstLayer,
+                srcLevel,
+                dstLevel,
+                depth,
+                levels);
+        }
+
+        public static void Copy(
+            CommandBufferScoped cbs,
+            MTLTexture srcImage,
+            MTLTexture dstImage,
+            TextureCreateInfo srcInfo,
+            TextureCreateInfo dstInfo,
+            int srcDepthOrLayer,
+            int dstDepthOrLayer,
+            int srcLevel,
+            int dstLevel,
+            int depthOrLayers,
+            int levels)
+        {
+            MTLBlitCommandEncoder blitCommandEncoder = cbs.Encoders.EnsureBlitEncoder();
+
+            int srcZ;
+            int srcLayer;
+            int srcDepth;
+            int srcLayers;
+
+            if (srcInfo.Target == Target.Texture3D)
+            {
+                srcZ = srcDepthOrLayer;
+                srcLayer = 0;
+                srcDepth = depthOrLayers;
+                srcLayers = 1;
+            }
+            else
+            {
+                srcZ = 0;
+                srcLayer = srcDepthOrLayer;
+                srcDepth = 1;
+                srcLayers = depthOrLayers;
+            }
+
+            int dstZ;
+            int dstLayer;
+            int dstLayers;
+
+            if (dstInfo.Target == Target.Texture3D)
+            {
+                dstZ = dstDepthOrLayer;
+                dstLayer = 0;
+                dstLayers = 1;
+            }
+            else
+            {
+                dstZ = 0;
+                dstLayer = dstDepthOrLayer;
+                dstLayers = depthOrLayers;
+            }
+
+            int srcWidth = srcInfo.Width;
+            int srcHeight = srcInfo.Height;
+
+            int dstWidth = dstInfo.Width;
+            int dstHeight = dstInfo.Height;
+
+            srcWidth = Math.Max(1, srcWidth >> srcLevel);
+            srcHeight = Math.Max(1, srcHeight >> srcLevel);
+
+            dstWidth = Math.Max(1, dstWidth >> dstLevel);
+            dstHeight = Math.Max(1, dstHeight >> dstLevel);
+
+            int blockWidth = 1;
+            int blockHeight = 1;
+            bool sizeInBlocks = false;
+
+            MTLBuffer tempBuffer = default;
+
+            if (srcInfo.Format != dstInfo.Format && (srcInfo.IsCompressed || dstInfo.IsCompressed))
+            {
+                // Compressed alias copies need to happen through a temporary buffer.
+                // The data is copied from the source to the buffer, then the buffer to the destination.
+                // The length of the buffer should be the maximum slice size for the destination.
+
+                tempBuffer = blitCommandEncoder.Device.NewBuffer((ulong)dstInfo.GetMipSize2D(0), MTLResourceOptions.ResourceStorageModePrivate);
+            }
+
+            // When copying from a compressed to a non-compressed format,
+            // the non-compressed texture will have the size of the texture
+            // in blocks (not in texels), so we must adjust that size to
+            // match the size in texels of the compressed texture.
+            if (!srcInfo.IsCompressed && dstInfo.IsCompressed)
+            {
+                srcWidth *= dstInfo.BlockWidth;
+                srcHeight *= dstInfo.BlockHeight;
+                blockWidth = dstInfo.BlockWidth;
+                blockHeight = dstInfo.BlockHeight;
+
+                sizeInBlocks = true;
+            }
+            else if (srcInfo.IsCompressed && !dstInfo.IsCompressed)
+            {
+                dstWidth *= srcInfo.BlockWidth;
+                dstHeight *= srcInfo.BlockHeight;
+                blockWidth = srcInfo.BlockWidth;
+                blockHeight = srcInfo.BlockHeight;
+            }
+
+            int width = Math.Min(srcWidth, dstWidth);
+            int height = Math.Min(srcHeight, dstHeight);
+
+            for (int level = 0; level < levels; level++)
+            {
+                // Stop copy if we are already out of the levels range.
+                if (level >= srcInfo.Levels || dstLevel + level >= dstInfo.Levels)
+                {
+                    break;
+                }
+
+                int copyWidth = sizeInBlocks ? BitUtils.DivRoundUp(width, blockWidth) : width;
+                int copyHeight = sizeInBlocks ? BitUtils.DivRoundUp(height, blockHeight) : height;
+
+                int layers = Math.Max(dstLayers - dstLayer, srcLayers);
+
+                for (int layer = 0; layer < layers; layer++)
+                {
+                    if (tempBuffer.NativePtr != 0)
+                    {
+                        // Copy through the temp buffer
+                        CopyFromOrToBuffer(cbs, tempBuffer, srcImage, srcInfo, true, srcLayer + layer, srcLevel + level, 0, 0, copyWidth, copyHeight);
+
+                        int dstBufferWidth = sizeInBlocks ? copyWidth * blockWidth : BitUtils.DivRoundUp(copyWidth, blockWidth);
+                        int dstBufferHeight = sizeInBlocks ? copyHeight * blockHeight : BitUtils.DivRoundUp(copyHeight, blockHeight);
+
+                        CopyFromOrToBuffer(cbs, tempBuffer, dstImage, dstInfo, false, dstLayer + layer, dstLevel + level, 0, 0, dstBufferWidth, dstBufferHeight);
+                    }
+                    else if (srcInfo.Samples > 1 && srcInfo.Samples != dstInfo.Samples)
+                    {
+                        // TODO
+
+                        Logger.Warning?.PrintMsg(LogClass.Gpu, "Unsupported mismatching sample count copy");
+                    }
+                    else
+                    {
+                        blitCommandEncoder.CopyFromTexture(
+                            srcImage,
+                            (ulong)(srcLayer + layer),
+                            (ulong)(srcLevel + level),
+                            new MTLOrigin { z = (ulong)srcZ },
+                            new MTLSize { width = (ulong)copyWidth, height = (ulong)copyHeight, depth = (ulong)srcDepth },
+                            dstImage,
+                            (ulong)(dstLayer + layer),
+                            (ulong)(dstLevel + level),
+                            new MTLOrigin { z = (ulong)dstZ });
+                    }
+                }
+
+                width = Math.Max(1, width >> 1);
+                height = Math.Max(1, height >> 1);
+
+                if (srcInfo.Target == Target.Texture3D)
+                {
+                    srcDepth = Math.Max(1, srcDepth >> 1);
+                }
+            }
+
+            if (tempBuffer.NativePtr != 0)
+            {
+                tempBuffer.Dispose();
+            }
+        }
+    }
+}

+ 60 - 0
src/Ryujinx.Graphics.Metal/VertexBufferState.cs

@@ -0,0 +1,60 @@
+using Ryujinx.Graphics.GAL;
+using SharpMetal.Metal;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    readonly internal struct VertexBufferState
+    {
+        public static VertexBufferState Null => new(BufferHandle.Null, 0, 0, 0);
+
+        private readonly BufferHandle _handle;
+        private readonly int _offset;
+        private readonly int _size;
+
+        public readonly int Stride;
+        public readonly int Divisor;
+
+        public VertexBufferState(BufferHandle handle, int offset, int size, int divisor, int stride = 0)
+        {
+            _handle = handle;
+            _offset = offset;
+            _size = size;
+
+            Stride = stride;
+            Divisor = divisor;
+        }
+
+        public (MTLBuffer, int) GetVertexBuffer(BufferManager bufferManager, CommandBufferScoped cbs)
+        {
+            Auto<DisposableBuffer> autoBuffer = null;
+
+            if (_handle != BufferHandle.Null)
+            {
+                // TODO: Handle restride if necessary
+
+                autoBuffer = bufferManager.GetBuffer(_handle, false, out int size);
+
+                // The original stride must be reapplied in case it was rewritten.
+                // TODO: Handle restride if necessary
+
+                if (_offset >= size)
+                {
+                    autoBuffer = null;
+                }
+            }
+
+            if (autoBuffer != null)
+            {
+                int offset = _offset;
+                var buffer = autoBuffer.Get(cbs, offset, _size).Value;
+
+                return (buffer, offset);
+            }
+
+            return (new MTLBuffer(IntPtr.Zero), 0);
+        }
+    }
+}

+ 231 - 0
src/Ryujinx.Graphics.Metal/Window.cs

@@ -0,0 +1,231 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Metal.Effects;
+using SharpMetal.ObjectiveCCore;
+using SharpMetal.QuartzCore;
+using System;
+using System.Runtime.Versioning;
+
+namespace Ryujinx.Graphics.Metal
+{
+    [SupportedOSPlatform("macos")]
+    class Window : IWindow, IDisposable
+    {
+        public bool ScreenCaptureRequested { get; set; }
+
+        private readonly MetalRenderer _renderer;
+        private readonly CAMetalLayer _metalLayer;
+
+        private int _width;
+        private int _height;
+
+        private int _requestedWidth;
+        private int _requestedHeight;
+
+        // private bool _vsyncEnabled;
+        private AntiAliasing _currentAntiAliasing;
+        private bool _updateEffect;
+        private IPostProcessingEffect _effect;
+        private IScalingFilter _scalingFilter;
+        private bool _isLinear;
+        // private float _scalingFilterLevel;
+        private bool _updateScalingFilter;
+        private ScalingFilter _currentScalingFilter;
+        // private bool _colorSpacePassthroughEnabled;
+
+        public Window(MetalRenderer renderer, CAMetalLayer metalLayer)
+        {
+            _renderer = renderer;
+            _metalLayer = metalLayer;
+        }
+
+        private unsafe void ResizeIfNeeded()
+        {
+            if (_requestedWidth != 0 && _requestedHeight != 0)
+            {
+                // TODO: This is actually a CGSize, but there is no overload for that, so fill the first two fields of rect with the size.
+                var rect = new NSRect(_requestedWidth, _requestedHeight, 0, 0);
+
+                ObjectiveC.objc_msgSend(_metalLayer, "setDrawableSize:", rect);
+
+                _requestedWidth = 0;
+                _requestedHeight = 0;
+            }
+        }
+
+        public unsafe void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback)
+        {
+            if (_renderer.Pipeline is Pipeline pipeline && texture is Texture tex)
+            {
+                ResizeIfNeeded();
+
+                var drawable = new CAMetalDrawable(ObjectiveC.IntPtr_objc_msgSend(_metalLayer, "nextDrawable"));
+
+                _width = (int)drawable.Texture.Width;
+                _height = (int)drawable.Texture.Height;
+
+                UpdateEffect();
+
+                if (_effect != null)
+                {
+                    // TODO: Run Effects
+                    // view = _effect.Run()
+                }
+
+                int srcX0, srcX1, srcY0, srcY1;
+
+                if (crop.Left == 0 && crop.Right == 0)
+                {
+                    srcX0 = 0;
+                    srcX1 = tex.Width;
+                }
+                else
+                {
+                    srcX0 = crop.Left;
+                    srcX1 = crop.Right;
+                }
+
+                if (crop.Top == 0 && crop.Bottom == 0)
+                {
+                    srcY0 = 0;
+                    srcY1 = tex.Height;
+                }
+                else
+                {
+                    srcY0 = crop.Top;
+                    srcY1 = crop.Bottom;
+                }
+
+                if (ScreenCaptureRequested)
+                {
+                    // TODO: Support screen captures
+
+                    ScreenCaptureRequested = false;
+                }
+
+                float ratioX = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _height * crop.AspectRatioX / (_width * crop.AspectRatioY));
+                float ratioY = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _width * crop.AspectRatioY / (_height * crop.AspectRatioX));
+
+                int dstWidth = (int)(_width * ratioX);
+                int dstHeight = (int)(_height * ratioY);
+
+                int dstPaddingX = (_width - dstWidth) / 2;
+                int dstPaddingY = (_height - dstHeight) / 2;
+
+                int dstX0 = crop.FlipX ? _width - dstPaddingX : dstPaddingX;
+                int dstX1 = crop.FlipX ? dstPaddingX : _width - dstPaddingX;
+
+                int dstY0 = crop.FlipY ? _height - dstPaddingY : dstPaddingY;
+                int dstY1 = crop.FlipY ? dstPaddingY : _height - dstPaddingY;
+
+                if (_scalingFilter != null)
+                {
+                    // TODO: Run scaling filter
+                }
+
+                pipeline.Present(
+                    drawable,
+                    tex,
+                    new Extents2D(srcX0, srcY0, srcX1, srcY1),
+                    new Extents2D(dstX0, dstY0, dstX1, dstY1),
+                    _isLinear);
+            }
+        }
+
+        public void SetSize(int width, int height)
+        {
+            _requestedWidth = width;
+            _requestedHeight = height;
+        }
+        
+        public void ChangeVSyncMode(VSyncMode vSyncMode)
+        {
+            //_vSyncMode = vSyncMode;
+        }
+
+        public void SetAntiAliasing(AntiAliasing effect)
+        {
+            if (_currentAntiAliasing == effect && _effect != null)
+            {
+                return;
+            }
+
+            _currentAntiAliasing = effect;
+
+            _updateEffect = true;
+        }
+
+        public void SetScalingFilter(ScalingFilter type)
+        {
+            if (_currentScalingFilter == type && _effect != null)
+            {
+                return;
+            }
+
+            _currentScalingFilter = type;
+
+            _updateScalingFilter = true;
+        }
+
+        public void SetScalingFilterLevel(float level)
+        {
+            // _scalingFilterLevel = level;
+            _updateScalingFilter = true;
+        }
+
+        public void SetColorSpacePassthrough(bool colorSpacePassThroughEnabled)
+        {
+            // _colorSpacePassthroughEnabled = colorSpacePassThroughEnabled;
+        }
+
+        private void UpdateEffect()
+        {
+            if (_updateEffect)
+            {
+                _updateEffect = false;
+
+                switch (_currentAntiAliasing)
+                {
+                    case AntiAliasing.Fxaa:
+                        _effect?.Dispose();
+                        Logger.Warning?.PrintMsg(LogClass.Gpu, "FXAA not implemented for Metal backend!");
+                        break;
+                    case AntiAliasing.None:
+                        _effect?.Dispose();
+                        _effect = null;
+                        break;
+                    case AntiAliasing.SmaaLow:
+                    case AntiAliasing.SmaaMedium:
+                    case AntiAliasing.SmaaHigh:
+                    case AntiAliasing.SmaaUltra:
+                        // var quality = _currentAntiAliasing - AntiAliasing.SmaaLow;
+                        Logger.Warning?.PrintMsg(LogClass.Gpu, "SMAA not implemented for Metal backend!");
+                        break;
+                }
+            }
+
+            if (_updateScalingFilter)
+            {
+                _updateScalingFilter = false;
+
+                switch (_currentScalingFilter)
+                {
+                    case ScalingFilter.Bilinear:
+                    case ScalingFilter.Nearest:
+                        _scalingFilter?.Dispose();
+                        _scalingFilter = null;
+                        _isLinear = _currentScalingFilter == ScalingFilter.Bilinear;
+                        break;
+                    case ScalingFilter.Fsr:
+                        Logger.Warning?.PrintMsg(LogClass.Gpu, "FSR not implemented for Metal backend!");
+                        break;
+                }
+            }
+        }
+
+        public void Dispose()
+        {
+            _metalLayer.Dispose();
+        }
+    }
+}

+ 108 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/CodeGenContext.cs

@@ -0,0 +1,108 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Text;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl
+{
+    class CodeGenContext
+    {
+        public const string Tab = "    ";
+
+        // The number of additional arguments that every function (except for the main one) must have (for instance support_buffer)
+        public const int AdditionalArgCount = 2;
+
+        public StructuredFunction CurrentFunction { get; set; }
+
+        public StructuredProgramInfo Info { get; }
+
+        public AttributeUsage AttributeUsage { get; }
+        public ShaderDefinitions Definitions { get; }
+        public ShaderProperties Properties { get; }
+        public HostCapabilities HostCapabilities { get; }
+        public ILogger Logger { get; }
+        public TargetApi TargetApi { get; }
+
+        public OperandManager OperandManager { get; }
+
+        private readonly StringBuilder _sb;
+
+        private int _level;
+
+        private string _indentation;
+
+        public CodeGenContext(StructuredProgramInfo info, CodeGenParameters parameters)
+        {
+            Info = info;
+            AttributeUsage = parameters.AttributeUsage;
+            Definitions = parameters.Definitions;
+            Properties = parameters.Properties;
+            HostCapabilities = parameters.HostCapabilities;
+            Logger = parameters.Logger;
+            TargetApi = parameters.TargetApi;
+
+            OperandManager = new OperandManager();
+
+            _sb = new StringBuilder();
+        }
+
+        public void AppendLine()
+        {
+            _sb.AppendLine();
+        }
+
+        public void AppendLine(string str)
+        {
+            _sb.AppendLine(_indentation + str);
+        }
+
+        public string GetCode()
+        {
+            return _sb.ToString();
+        }
+
+        public void EnterScope(string prefix = "")
+        {
+            AppendLine(prefix + "{");
+
+            _level++;
+
+            UpdateIndentation();
+        }
+
+        public void LeaveScope(string suffix = "")
+        {
+            if (_level == 0)
+            {
+                return;
+            }
+
+            _level--;
+
+            UpdateIndentation();
+
+            AppendLine("}" + suffix);
+        }
+
+        public StructuredFunction GetFunction(int id)
+        {
+            return Info.Functions[id];
+        }
+
+        private void UpdateIndentation()
+        {
+            _indentation = GetIndentation(_level);
+        }
+
+        private static string GetIndentation(int level)
+        {
+            string indentation = string.Empty;
+
+            for (int index = 0; index < level; index++)
+            {
+                indentation += Tab;
+            }
+
+            return indentation;
+        }
+    }
+}

+ 578 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Declarations.cs

@@ -0,0 +1,578 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl
+{
+    static class Declarations
+    {
+        /*
+         * Description of MSL Binding Model
+         *
+         * There are a few fundamental differences between how GLSL and MSL handle I/O.
+         * This comment will set out to describe the reasons why things are done certain ways
+         * and to describe the overall binding model that we're striving for here.
+         *
+         * Main I/O Structs
+         *
+         * Each stage has a main input and output struct (if applicable) labeled as [Stage][In/Out], i.e VertexIn.
+         * Every field within these structs is labeled with an [[attribute(n)]] property,
+         * and the overall struct is labeled with [[stage_in]] for input structs, and defined as the
+         * output type of the main shader function for the output struct. This struct also contains special
+         * attribute-based properties like [[position]] that would be "built-ins" in a GLSL context.
+         *
+         * These structs are passed as inputs to all inline functions due to containing "built-ins"
+         * that inline functions assume access to.
+         *
+         * Vertex & Zero Buffers
+         *
+         * Binding indices 0-16 are reserved for vertex buffers, and binding 18 is reserved for the zero buffer.
+         *
+         * Uniforms & Storage Buffers
+         *
+         * Uniforms and storage buffers are tightly packed into their respective argument buffers
+         * (effectively ignoring binding indices at shader level), with each pointer to the corresponding
+         * struct that defines the layout and fields of these buffers (usually just a single data array), laid
+         * out one after the other in ascending order of their binding index.
+         *
+         * The uniforms argument buffer is always bound at a fixed index of 20.
+         * The storage buffers argument buffer is always bound at a fixed index of 21.
+         *
+         * These structs are passed as inputs to all inline functions as in GLSL or SPIRV,
+         * uniforms and storage buffers would be globals, and inline functions assume access to these buffers.
+         *
+         * Samplers & Textures
+         *
+         * Metal does not have a combined image sampler like sampler2D in GLSL, as a result we need to bind
+         * an individual texture and a sampler object for each instance of a combined image sampler.
+         * Samplers and textures are bound in a shared argument buffer. This argument buffer is tightly packed
+         * (effectively ignoring binding indices at shader level), with texture and their samplers (if present)
+         * laid out one after the other in ascending order of their binding index.
+         *
+         * The samplers and textures argument buffer is always bound at a fixed index of 22.
+         *
+         */
+
+        public static int[] Declare(CodeGenContext context, StructuredProgramInfo info)
+        {
+            // TODO: Re-enable this warning
+            context.AppendLine("#pragma clang diagnostic ignored \"-Wunused-variable\"");
+            context.AppendLine();
+            context.AppendLine("#include <metal_stdlib>");
+            context.AppendLine("#include <simd/simd.h>");
+            context.AppendLine();
+            context.AppendLine("using namespace metal;");
+            context.AppendLine();
+
+            var fsi = (info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0;
+
+            DeclareInputAttributes(context, info.IoDefinitions.Where(x => IsUserDefined(x, StorageKind.Input)));
+            context.AppendLine();
+            DeclareOutputAttributes(context, info.IoDefinitions.Where(x => x.StorageKind == StorageKind.Output));
+            context.AppendLine();
+            DeclareBufferStructures(context, context.Properties.ConstantBuffers.Values.OrderBy(x => x.Binding).ToArray(), true, fsi);
+            DeclareBufferStructures(context, context.Properties.StorageBuffers.Values.OrderBy(x => x.Binding).ToArray(), false, fsi);
+
+            // We need to declare each set as a new struct
+            var textureDefinitions = context.Properties.Textures.Values
+                .GroupBy(x => x.Set)
+                .ToDictionary(x => x.Key, x => x.OrderBy(y => y.Binding).ToArray());
+
+            var imageDefinitions = context.Properties.Images.Values
+                .GroupBy(x => x.Set)
+                .ToDictionary(x => x.Key, x => x.OrderBy(y => y.Binding).ToArray());
+
+            var textureSets = textureDefinitions.Keys.ToArray();
+            var imageSets = imageDefinitions.Keys.ToArray();
+
+            var sets = textureSets.Union(imageSets).ToArray();
+
+            foreach (var set in textureDefinitions)
+            {
+                DeclareTextures(context, set.Value, set.Key);
+            }
+
+            foreach (var set in imageDefinitions)
+            {
+                DeclareImages(context, set.Value, set.Key, fsi);
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.FindLSB) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal");
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.FindMSBS32) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal");
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.FindMSBU32) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal");
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal");
+            }
+
+            if ((info.HelperFunctionsMask & HelperFunctionsMask.Precise) != 0)
+            {
+                AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal");
+            }
+
+            return sets;
+        }
+
+        static bool IsUserDefined(IoDefinition ioDefinition, StorageKind storageKind)
+        {
+            return ioDefinition.StorageKind == storageKind && ioDefinition.IoVariable == IoVariable.UserDefined;
+        }
+
+        public static void DeclareLocals(CodeGenContext context, StructuredFunction function, ShaderStage stage, bool isMainFunc = false)
+        {
+            if (isMainFunc)
+            {
+                // TODO: Support OaIndexing
+                if (context.Definitions.IaIndexing)
+                {
+                    context.EnterScope($"array<float4, {Constants.MaxAttributes}> {Defaults.IAttributePrefix} = ");
+
+                    for (int i = 0; i < Constants.MaxAttributes; i++)
+                    {
+                        context.AppendLine($"in.{Defaults.IAttributePrefix}{i},");
+                    }
+
+                    context.LeaveScope(";");
+                }
+
+                DeclareMemories(context, context.Properties.LocalMemories.Values, isShared: false);
+                DeclareMemories(context, context.Properties.SharedMemories.Values, isShared: true);
+
+                switch (stage)
+                {
+                    case ShaderStage.Vertex:
+                        context.AppendLine("VertexOut out = {};");
+                        // TODO: Only add if necessary
+                        context.AppendLine("uint instance_index = instance_id + base_instance;");
+                        break;
+                    case ShaderStage.Fragment:
+                        context.AppendLine("FragmentOut out = {};");
+                        break;
+                }
+
+                // TODO: Only add if necessary
+                if (stage != ShaderStage.Compute)
+                {
+                    // MSL does not give us access to [[thread_index_in_simdgroup]]
+                    // outside compute. But we may still need to provide this value in frag/vert.
+                    context.AppendLine("uint thread_index_in_simdgroup = simd_prefix_exclusive_sum(1);");
+                }
+            }
+
+            foreach (AstOperand decl in function.Locals)
+            {
+                string name = context.OperandManager.DeclareLocal(decl);
+
+                context.AppendLine(GetVarTypeName(decl.VarType) + " " + name + ";");
+            }
+        }
+
+        public static string GetVarTypeName(AggregateType type, bool atomic = false)
+        {
+            var s32 = atomic ? "atomic_int" : "int";
+            var u32 = atomic ? "atomic_uint" : "uint";
+
+            return type switch
+            {
+                AggregateType.Void => "void",
+                AggregateType.Bool => "bool",
+                AggregateType.FP32 => "float",
+                AggregateType.S32 => s32,
+                AggregateType.U32 => u32,
+                AggregateType.Vector2 | AggregateType.Bool => "bool2",
+                AggregateType.Vector2 | AggregateType.FP32 => "float2",
+                AggregateType.Vector2 | AggregateType.S32 => "int2",
+                AggregateType.Vector2 | AggregateType.U32 => "uint2",
+                AggregateType.Vector3 | AggregateType.Bool => "bool3",
+                AggregateType.Vector3 | AggregateType.FP32 => "float3",
+                AggregateType.Vector3 | AggregateType.S32 => "int3",
+                AggregateType.Vector3 | AggregateType.U32 => "uint3",
+                AggregateType.Vector4 | AggregateType.Bool => "bool4",
+                AggregateType.Vector4 | AggregateType.FP32 => "float4",
+                AggregateType.Vector4 | AggregateType.S32 => "int4",
+                AggregateType.Vector4 | AggregateType.U32 => "uint4",
+                _ => throw new ArgumentException($"Invalid variable type \"{type}\"."),
+            };
+        }
+
+        private static void DeclareMemories(CodeGenContext context, IEnumerable<MemoryDefinition> memories, bool isShared)
+        {
+            string prefix = isShared ? "threadgroup " : string.Empty;
+
+            foreach (var memory in memories)
+            {
+                string arraySize = "";
+                if ((memory.Type & AggregateType.Array) != 0)
+                {
+                    arraySize = $"[{memory.ArrayLength}]";
+                }
+                var typeName = GetVarTypeName(memory.Type & ~AggregateType.Array);
+                context.AppendLine($"{prefix}{typeName} {memory.Name}{arraySize};");
+            }
+        }
+
+        private static void DeclareBufferStructures(CodeGenContext context, BufferDefinition[] buffers, bool constant, bool fsi)
+        {
+            var name = constant ? "ConstantBuffers" : "StorageBuffers";
+            var addressSpace = constant ? "constant" : "device";
+
+            string[] bufferDec = new string[buffers.Length];
+
+            for (int i = 0; i < buffers.Length; i++)
+            {
+                BufferDefinition buffer = buffers[i];
+
+                var needsPadding = buffer.Layout == BufferLayout.Std140;
+                string fsiSuffix = !constant && fsi ? " [[raster_order_group(0)]]" : "";
+
+                bufferDec[i] = $"{addressSpace} {Defaults.StructPrefix}_{buffer.Name}* {buffer.Name}{fsiSuffix};";
+
+                context.AppendLine($"struct {Defaults.StructPrefix}_{buffer.Name}");
+                context.EnterScope();
+
+                foreach (StructureField field in buffer.Type.Fields)
+                {
+                    var type = field.Type;
+                    type |= (needsPadding && (field.Type & AggregateType.Array) != 0)
+                        ? AggregateType.Vector4
+                        : AggregateType.Invalid;
+
+                    type &= ~AggregateType.Array;
+
+                    string typeName = GetVarTypeName(type);
+                    string arraySuffix = "";
+
+                    if (field.Type.HasFlag(AggregateType.Array))
+                    {
+                        if (field.ArrayLength > 0)
+                        {
+                            arraySuffix = $"[{field.ArrayLength}]";
+                        }
+                        else
+                        {
+                            // Probably UB, but this is the approach that MVK takes
+                            arraySuffix = "[1]";
+                        }
+                    }
+
+                    context.AppendLine($"{typeName} {field.Name}{arraySuffix};");
+                }
+
+                context.LeaveScope(";");
+                context.AppendLine();
+            }
+
+            context.AppendLine($"struct {name}");
+            context.EnterScope();
+
+            foreach (var declaration in bufferDec)
+            {
+                context.AppendLine(declaration);
+            }
+
+            context.LeaveScope(";");
+            context.AppendLine();
+        }
+
+        private static void DeclareTextures(CodeGenContext context, TextureDefinition[] textures, int set)
+        {
+            var setName = GetNameForSet(set);
+            context.AppendLine($"struct {setName}");
+            context.EnterScope();
+
+            List<string> textureDec = [];
+
+            foreach (TextureDefinition texture in textures)
+            {
+                if (texture.Type != SamplerType.None)
+                {
+                    var textureTypeName = texture.Type.ToMslTextureType(texture.Format.GetComponentType());
+
+                    if (texture.ArrayLength > 1)
+                    {
+                        textureTypeName = $"array<{textureTypeName}, {texture.ArrayLength}>";
+                    }
+
+                    textureDec.Add($"{textureTypeName} tex_{texture.Name};");
+                }
+
+                if (!texture.Separate && texture.Type != SamplerType.TextureBuffer)
+                {
+                    var samplerType = "sampler";
+
+                    if (texture.ArrayLength > 1)
+                    {
+                        samplerType = $"array<{samplerType}, {texture.ArrayLength}>";
+                    }
+
+                    textureDec.Add($"{samplerType} samp_{texture.Name};");
+                }
+            }
+
+            foreach (var declaration in textureDec)
+            {
+                context.AppendLine(declaration);
+            }
+
+            context.LeaveScope(";");
+            context.AppendLine();
+        }
+
+        private static void DeclareImages(CodeGenContext context, TextureDefinition[] images, int set, bool fsi)
+        {
+            var setName = GetNameForSet(set);
+            context.AppendLine($"struct {setName}");
+            context.EnterScope();
+
+            string[] imageDec = new string[images.Length];
+
+            for (int i = 0; i < images.Length; i++)
+            {
+                TextureDefinition image = images[i];
+
+                var imageTypeName = image.Type.ToMslTextureType(image.Format.GetComponentType(), true);
+                if (image.ArrayLength > 1)
+                {
+                    imageTypeName = $"array<{imageTypeName}, {image.ArrayLength}>";
+                }
+
+                string fsiSuffix = fsi ? " [[raster_order_group(0)]]" : "";
+
+                imageDec[i] = $"{imageTypeName} {image.Name}{fsiSuffix};";
+            }
+
+            foreach (var declaration in imageDec)
+            {
+                context.AppendLine(declaration);
+            }
+
+            context.LeaveScope(";");
+            context.AppendLine();
+        }
+
+        private static void DeclareInputAttributes(CodeGenContext context, IEnumerable<IoDefinition> inputs)
+        {
+            if (context.Definitions.Stage == ShaderStage.Compute)
+            {
+                return;
+            }
+
+            switch (context.Definitions.Stage)
+            {
+                case ShaderStage.Vertex:
+                    context.AppendLine("struct VertexIn");
+                    break;
+                case ShaderStage.Fragment:
+                    context.AppendLine("struct FragmentIn");
+                    break;
+            }
+
+            context.EnterScope();
+
+            if (context.Definitions.Stage == ShaderStage.Fragment)
+            {
+                // TODO: check if it's needed
+                context.AppendLine("float4 position [[position, invariant]];");
+                context.AppendLine("bool front_facing [[front_facing]];");
+                context.AppendLine("float2 point_coord [[point_coord]];");
+                context.AppendLine("uint primitive_id [[primitive_id]];");
+            }
+
+            if (context.Definitions.IaIndexing)
+            {
+                // MSL does not support arrays in stage I/O
+                // We need to use the SPIRV-Cross workaround
+                for (int i = 0; i < Constants.MaxAttributes; i++)
+                {
+                    var suffix = context.Definitions.Stage == ShaderStage.Fragment ? $"[[user(loc{i})]]" : $"[[attribute({i})]]";
+                    context.AppendLine($"float4 {Defaults.IAttributePrefix}{i} {suffix};");
+                }
+            }
+
+            if (inputs.Any())
+            {
+                foreach (var ioDefinition in inputs.OrderBy(x => x.Location))
+                {
+                    if (context.Definitions.IaIndexing && ioDefinition.IoVariable == IoVariable.UserDefined)
+                    {
+                        continue;
+                    }
+
+                    string iq = string.Empty;
+
+                    if (context.Definitions.Stage == ShaderStage.Fragment)
+                    {
+                        iq = context.Definitions.ImapTypes[ioDefinition.Location].GetFirstUsedType() switch
+                        {
+                            PixelImap.Constant => "[[flat]] ",
+                            PixelImap.ScreenLinear => "[[center_no_perspective]] ",
+                            _ => string.Empty,
+                        };
+                    }
+
+                    string type = ioDefinition.IoVariable switch
+                    {
+                        // IoVariable.Position => "float4",
+                        IoVariable.GlobalId => "uint3",
+                        IoVariable.VertexId => "uint",
+                        IoVariable.VertexIndex => "uint",
+                        // IoVariable.PointCoord => "float2",
+                        _ => GetVarTypeName(context.Definitions.GetUserDefinedType(ioDefinition.Location, isOutput: false))
+                    };
+                    string name = ioDefinition.IoVariable switch
+                    {
+                        // IoVariable.Position => "position",
+                        IoVariable.GlobalId => "global_id",
+                        IoVariable.VertexId => "vertex_id",
+                        IoVariable.VertexIndex => "vertex_index",
+                        // IoVariable.PointCoord => "point_coord",
+                        _ => $"{Defaults.IAttributePrefix}{ioDefinition.Location}"
+                    };
+                    string suffix = ioDefinition.IoVariable switch
+                    {
+                        // IoVariable.Position => "[[position, invariant]]",
+                        IoVariable.GlobalId => "[[thread_position_in_grid]]",
+                        IoVariable.VertexId => "[[vertex_id]]",
+                        // TODO: Avoid potential redeclaration
+                        IoVariable.VertexIndex => "[[vertex_id]]",
+                        // IoVariable.PointCoord => "[[point_coord]]",
+                        IoVariable.UserDefined => context.Definitions.Stage == ShaderStage.Fragment ? $"[[user(loc{ioDefinition.Location})]]" : $"[[attribute({ioDefinition.Location})]]",
+                        _ => ""
+                    };
+
+                    context.AppendLine($"{type} {name} {iq}{suffix};");
+                }
+            }
+
+            context.LeaveScope(";");
+        }
+
+        private static void DeclareOutputAttributes(CodeGenContext context, IEnumerable<IoDefinition> outputs)
+        {
+            switch (context.Definitions.Stage)
+            {
+                case ShaderStage.Vertex:
+                    context.AppendLine("struct VertexOut");
+                    break;
+                case ShaderStage.Fragment:
+                    context.AppendLine("struct FragmentOut");
+                    break;
+                case ShaderStage.Compute:
+                    context.AppendLine("struct KernelOut");
+                    break;
+            }
+
+            context.EnterScope();
+
+            if (context.Definitions.OaIndexing)
+            {
+                // MSL does not support arrays in stage I/O
+                // We need to use the SPIRV-Cross workaround
+                for (int i = 0; i < Constants.MaxAttributes; i++)
+                {
+                    context.AppendLine($"float4 {Defaults.OAttributePrefix}{i} [[user(loc{i})]];");
+                }
+            }
+
+            if (outputs.Any())
+            {
+                outputs = outputs.OrderBy(x => x.Location);
+
+                if (context.Definitions.Stage == ShaderStage.Fragment && context.Definitions.DualSourceBlend)
+                {
+                    IoDefinition firstOutput = outputs.ElementAtOrDefault(0);
+                    IoDefinition secondOutput = outputs.ElementAtOrDefault(1);
+
+                    var type1 = GetVarTypeName(context.Definitions.GetFragmentOutputColorType(firstOutput.Location));
+                    var type2 = GetVarTypeName(context.Definitions.GetFragmentOutputColorType(secondOutput.Location));
+
+                    var name1 = $"color{firstOutput.Location}";
+                    var name2 = $"color{firstOutput.Location + 1}";
+
+                    context.AppendLine($"{type1} {name1} [[color({firstOutput.Location}), index(0)]];");
+                    context.AppendLine($"{type2} {name2} [[color({firstOutput.Location}), index(1)]];");
+
+                    outputs = outputs.Skip(2);
+                }
+
+                foreach (var ioDefinition in outputs)
+                {
+                    if (context.Definitions.OaIndexing && ioDefinition.IoVariable == IoVariable.UserDefined)
+                    {
+                        continue;
+                    }
+
+                    string type = ioDefinition.IoVariable switch
+                    {
+                        IoVariable.Position => "float4",
+                        IoVariable.PointSize => "float",
+                        IoVariable.FragmentOutputColor => GetVarTypeName(context.Definitions.GetFragmentOutputColorType(ioDefinition.Location)),
+                        IoVariable.FragmentOutputDepth => "float",
+                        IoVariable.ClipDistance => "float",
+                        _ => GetVarTypeName(context.Definitions.GetUserDefinedType(ioDefinition.Location, isOutput: true))
+                    };
+                    string name = ioDefinition.IoVariable switch
+                    {
+                        IoVariable.Position => "position",
+                        IoVariable.PointSize => "point_size",
+                        IoVariable.FragmentOutputColor => $"color{ioDefinition.Location}",
+                        IoVariable.FragmentOutputDepth => "depth",
+                        IoVariable.ClipDistance => "clip_distance",
+                        _ => $"{Defaults.OAttributePrefix}{ioDefinition.Location}"
+                    };
+                    string suffix = ioDefinition.IoVariable switch
+                    {
+                        IoVariable.Position => "[[position, invariant]]",
+                        IoVariable.PointSize => "[[point_size]]",
+                        IoVariable.UserDefined => $"[[user(loc{ioDefinition.Location})]]",
+                        IoVariable.FragmentOutputColor => $"[[color({ioDefinition.Location})]]",
+                        IoVariable.FragmentOutputDepth => "[[depth(any)]]",
+                        IoVariable.ClipDistance => $"[[clip_distance]][{Defaults.TotalClipDistances}]",
+                        _ => ""
+                    };
+
+                    context.AppendLine($"{type} {name} {suffix};");
+                }
+            }
+
+            context.LeaveScope(";");
+        }
+
+        private static void AppendHelperFunction(CodeGenContext context, string filename)
+        {
+            string code = EmbeddedResources.ReadAllText(filename);
+
+            code = code.Replace("\t", CodeGenContext.Tab);
+
+            context.AppendLine(code);
+            context.AppendLine();
+        }
+
+        public static string GetNameForSet(int set, bool forVar = false)
+        {
+            return (uint)set switch
+            {
+                Defaults.TexturesSetIndex => forVar ? "textures" : "Textures",
+                Defaults.ImagesSetIndex => forVar ? "images" : "Images",
+                _ => $"{(forVar ? "set" : "Set")}{set}"
+            };
+        }
+    }
+}

+ 34 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Defaults.cs

@@ -0,0 +1,34 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl
+{
+    static class Defaults
+    {
+        public const string LocalNamePrefix = "temp";
+
+        public const string PerPatchAttributePrefix = "patchAttr";
+        public const string IAttributePrefix = "inAttr";
+        public const string OAttributePrefix = "outAttr";
+
+        public const string StructPrefix = "struct";
+
+        public const string ArgumentNamePrefix = "a";
+
+        public const string UndefinedName = "0";
+
+        public const int MaxVertexBuffers = 16;
+
+        public const uint ZeroBufferIndex = MaxVertexBuffers;
+        public const uint BaseSetIndex = MaxVertexBuffers + 1;
+
+        public const uint ConstantBuffersIndex = BaseSetIndex;
+        public const uint StorageBuffersIndex = BaseSetIndex + 1;
+        public const uint TexturesIndex = BaseSetIndex + 2;
+        public const uint ImagesIndex = BaseSetIndex + 3;
+
+        public const uint ConstantBuffersSetIndex = 0;
+        public const uint StorageBuffersSetIndex = 1;
+        public const uint TexturesSetIndex = 2;
+        public const uint ImagesSetIndex = 3;
+
+        public const int TotalClipDistances = 8;
+    }
+}

+ 5 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindLSB.metal

@@ -0,0 +1,5 @@
+template<typename T>
+inline T findLSB(T x)
+{
+    return select(ctz(x), T(-1), x == T(0));
+}

+ 5 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBS32.metal

@@ -0,0 +1,5 @@
+template<typename T>
+inline T findMSBS32(T x)
+{
+    return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));
+}

+ 6 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/FindMSBU32.metal

@@ -0,0 +1,6 @@
+template<typename T>
+inline T findMSBU32(T x)
+{
+    T v = select(x, T(-1) - x, x < T(0));
+    return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));
+}

+ 10 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/HelperFunctionNames.cs

@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl
+{
+    static class HelperFunctionNames
+    {
+        public static string FindLSB = "findLSB";
+        public static string FindMSBS32 = "findMSBS32";
+        public static string FindMSBU32 = "findMSBU32";
+        public static string SwizzleAdd = "swizzleAdd";
+    }
+}

+ 14 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/Precise.metal

@@ -0,0 +1,14 @@
+template<typename T>
+[[clang::optnone]] T PreciseFAdd(T l, T r) {
+    return fma(T(1), l, r);
+}
+
+template<typename T>
+[[clang::optnone]] T PreciseFSub(T l, T r) {
+    return fma(T(-1), r, l);
+}
+
+template<typename T>
+[[clang::optnone]] T PreciseFMul(T l, T r) {
+    return fma(l, r, T(0));
+}

+ 7 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/HelperFunctions/SwizzleAdd.metal

@@ -0,0 +1,7 @@
+float swizzleAdd(float x, float y, int mask, uint thread_index_in_simdgroup)
+{
+    float4 xLut = float4(1.0, -1.0, 1.0, 0.0);
+    float4 yLut = float4(1.0, 1.0, -1.0, 1.0);
+    int lutIdx = (mask >> (int(thread_index_in_simdgroup & 3u) * 2)) & 3;
+    return x * xLut[lutIdx] + y * yLut[lutIdx];
+}

+ 185 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGen.cs

@@ -0,0 +1,185 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Text;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenBallot;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenBarrier;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenCall;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenMemory;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenVector;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGen
+    {
+        public static string GetExpression(CodeGenContext context, IAstNode node)
+        {
+            if (node is AstOperation operation)
+            {
+                return GetExpression(context, operation);
+            }
+            else if (node is AstOperand operand)
+            {
+                return context.OperandManager.GetExpression(context, operand);
+            }
+
+            throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\".");
+        }
+
+        private static string GetExpression(CodeGenContext context, AstOperation operation)
+        {
+            Instruction inst = operation.Inst;
+
+            InstInfo info = GetInstructionInfo(inst);
+
+            if ((info.Type & InstType.Call) != 0)
+            {
+                bool atomic = (info.Type & InstType.Atomic) != 0;
+
+                int arity = (int)(info.Type & InstType.ArityMask);
+
+                StringBuilder builder = new();
+
+                if (atomic && (operation.StorageKind == StorageKind.StorageBuffer || operation.StorageKind == StorageKind.SharedMemory))
+                {
+                    AggregateType dstType = operation.Inst == Instruction.AtomicMaxS32 || operation.Inst == Instruction.AtomicMinS32
+                        ? AggregateType.S32
+                        : AggregateType.U32;
+
+                    var shared = operation.StorageKind == StorageKind.SharedMemory;
+
+                    builder.Append($"({(shared ? "threadgroup" : "device")} {Declarations.GetVarTypeName(dstType, true)}*)&{GenerateLoadOrStore(context, operation, isStore: false)}");
+
+                    for (int argIndex = operation.SourcesCount - arity + 2; argIndex < operation.SourcesCount; argIndex++)
+                    {
+                        builder.Append($", {GetSourceExpr(context, operation.GetSource(argIndex), dstType)}, memory_order_relaxed");
+                    }
+                }
+                else
+                {
+                    for (int argIndex = 0; argIndex < arity; argIndex++)
+                    {
+                        if (argIndex != 0)
+                        {
+                            builder.Append(", ");
+                        }
+
+                        AggregateType dstType = GetSrcVarType(inst, argIndex);
+
+                        builder.Append(GetSourceExpr(context, operation.GetSource(argIndex), dstType));
+                    }
+
+                    if ((operation.Inst & Instruction.Mask) == Instruction.SwizzleAdd)
+                    {
+                        // SwizzleAdd takes one last argument, the thread_index_in_simdgroup
+                        builder.Append(", thread_index_in_simdgroup");
+                    }
+                }
+
+                return $"{info.OpName}({builder})";
+            }
+            else if ((info.Type & InstType.Op) != 0)
+            {
+                string op = info.OpName;
+
+                if (inst == Instruction.Return && operation.SourcesCount != 0)
+                {
+                    return $"{op} {GetSourceExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}";
+                }
+                if (inst == Instruction.Return && context.Definitions.Stage is ShaderStage.Vertex or ShaderStage.Fragment)
+                {
+                    return $"{op} out";
+                }
+
+                int arity = (int)(info.Type & InstType.ArityMask);
+
+                string[] expr = new string[arity];
+
+                for (int index = 0; index < arity; index++)
+                {
+                    IAstNode src = operation.GetSource(index);
+
+                    string srcExpr = GetSourceExpr(context, src, GetSrcVarType(inst, index));
+
+                    bool isLhs = arity == 2 && index == 0;
+
+                    expr[index] = Enclose(srcExpr, src, inst, info, isLhs);
+                }
+
+                switch (arity)
+                {
+                    case 0:
+                        return op;
+
+                    case 1:
+                        return op + expr[0];
+
+                    case 2:
+                        if (operation.ForcePrecise)
+                        {
+                            var func = (inst & Instruction.Mask) switch
+                            {
+                                Instruction.Add => "PreciseFAdd",
+                                Instruction.Subtract => "PreciseFSub",
+                                Instruction.Multiply => "PreciseFMul",
+                            };
+
+                            return $"{func}({expr[0]}, {expr[1]})";
+                        }
+
+                        return $"{expr[0]} {op} {expr[1]}";
+
+                    case 3:
+                        return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}";
+                }
+            }
+            else if ((info.Type & InstType.Special) != 0)
+            {
+                switch (inst & Instruction.Mask)
+                {
+                    case Instruction.Ballot:
+                        return Ballot(context, operation);
+                    case Instruction.Call:
+                        return Call(context, operation);
+                    case Instruction.FSIBegin:
+                    case Instruction.FSIEnd:
+                        return "// FSI implemented with raster order groups in MSL";
+                    case Instruction.GroupMemoryBarrier:
+                    case Instruction.MemoryBarrier:
+                    case Instruction.Barrier:
+                        return Barrier(context, operation);
+                    case Instruction.ImageLoad:
+                    case Instruction.ImageStore:
+                    case Instruction.ImageAtomic:
+                        return ImageLoadOrStore(context, operation);
+                    case Instruction.Load:
+                        return Load(context, operation);
+                    case Instruction.Lod:
+                        return Lod(context, operation);
+                    case Instruction.Store:
+                        return Store(context, operation);
+                    case Instruction.TextureSample:
+                        return TextureSample(context, operation);
+                    case Instruction.TextureQuerySamples:
+                        return TextureQuerySamples(context, operation);
+                    case Instruction.TextureQuerySize:
+                        return TextureQuerySize(context, operation);
+                    case Instruction.PackHalf2x16:
+                        return PackHalf2x16(context, operation);
+                    case Instruction.UnpackHalf2x16:
+                        return UnpackHalf2x16(context, operation);
+                    case Instruction.VectorExtract:
+                        return VectorExtract(context, operation);
+                    case Instruction.VoteAllEqual:
+                        return VoteAllEqual(context, operation);
+                }
+            }
+
+            // TODO: Return this to being an error
+            return $"Unexpected instruction type \"{info.Type}\".";
+        }
+    }
+}

+ 30 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBallot.cs

@@ -0,0 +1,30 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenBallot
+    {
+        public static string Ballot(CodeGenContext context, AstOperation operation)
+        {
+            AggregateType dstType = GetSrcVarType(operation.Inst, 0);
+
+            string arg = GetSourceExpr(context, operation.GetSource(0), dstType);
+            char component = "xyzw"[operation.Index];
+
+            return $"uint4(as_type<uint2>((simd_vote::vote_t)simd_ballot({arg})), 0, 0).{component}";
+        }
+
+        public static string VoteAllEqual(CodeGenContext context, AstOperation operation)
+        {
+            AggregateType dstType = GetSrcVarType(operation.Inst, 0);
+
+            string arg = GetSourceExpr(context, operation.GetSource(0), dstType);
+
+            return $"simd_all({arg}) || !simd_any({arg})";
+        }
+    }
+}

+ 15 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenBarrier.cs

@@ -0,0 +1,15 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenBarrier
+    {
+        public static string Barrier(CodeGenContext context, AstOperation operation)
+        {
+            var device = (operation.Inst & Instruction.Mask) == Instruction.MemoryBarrier;
+
+            return $"threadgroup_barrier(mem_flags::mem_{(device ? "device" : "threadgroup")})";
+        }
+    }
+}

+ 60 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenCall.cs

@@ -0,0 +1,60 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenCall
+    {
+        public static string Call(CodeGenContext context, AstOperation operation)
+        {
+            AstOperand funcId = (AstOperand)operation.GetSource(0);
+
+            var function = context.GetFunction(funcId.Value);
+
+            int argCount = operation.SourcesCount - 1;
+            int additionalArgCount = CodeGenContext.AdditionalArgCount + (context.Definitions.Stage != ShaderStage.Compute ? 1 : 0);
+            bool needsThreadIndex = false;
+
+            // TODO: Replace this with a proper flag
+            if (function.Name.Contains("Shuffle"))
+            {
+                needsThreadIndex = true;
+                additionalArgCount++;
+            }
+
+            string[] args = new string[argCount + additionalArgCount];
+
+            // Additional arguments
+            if (context.Definitions.Stage != ShaderStage.Compute)
+            {
+                args[0] = "in";
+                args[1] = "constant_buffers";
+                args[2] = "storage_buffers";
+
+                if (needsThreadIndex)
+                {
+                    args[3] = "thread_index_in_simdgroup";
+                }
+            }
+            else
+            {
+                args[0] = "constant_buffers";
+                args[1] = "storage_buffers";
+
+                if (needsThreadIndex)
+                {
+                    args[2] = "thread_index_in_simdgroup";
+                }
+            }
+
+            int argIndex = additionalArgCount;
+            for (int i = 0; i < argCount; i++)
+            {
+                args[argIndex++] = GetSourceExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i));
+            }
+
+            return $"{function.Name}({string.Join(", ", args)})";
+        }
+    }
+}

+ 222 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenHelper.cs

@@ -0,0 +1,222 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.TypeConversion;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenHelper
+    {
+        private static readonly InstInfo[] _infoTable;
+
+        static InstGenHelper()
+        {
+            _infoTable = new InstInfo[(int)Instruction.Count];
+
+#pragma warning disable IDE0055 // Disable formatting
+            Add(Instruction.AtomicAdd,                InstType.AtomicBinary,   "atomic_fetch_add_explicit");
+            Add(Instruction.AtomicAnd,                InstType.AtomicBinary,   "atomic_fetch_and_explicit");
+            Add(Instruction.AtomicCompareAndSwap,     InstType.AtomicBinary,   "atomic_compare_exchange_weak_explicit");
+            Add(Instruction.AtomicMaxU32,             InstType.AtomicBinary,   "atomic_fetch_max_explicit");
+            Add(Instruction.AtomicMinU32,             InstType.AtomicBinary,   "atomic_fetch_min_explicit");
+            Add(Instruction.AtomicOr,                 InstType.AtomicBinary,   "atomic_fetch_or_explicit");
+            Add(Instruction.AtomicSwap,               InstType.AtomicBinary,   "atomic_exchange_explicit");
+            Add(Instruction.AtomicXor,                InstType.AtomicBinary,   "atomic_fetch_xor_explicit");
+            Add(Instruction.Absolute,                 InstType.CallUnary,      "abs");
+            Add(Instruction.Add,                      InstType.OpBinaryCom,    "+",  2);
+            Add(Instruction.Ballot,                   InstType.Special);
+            Add(Instruction.Barrier,                  InstType.Special);
+            Add(Instruction.BitCount,                 InstType.CallUnary,      "popcount");
+            Add(Instruction.BitfieldExtractS32,       InstType.CallTernary,    "extract_bits");
+            Add(Instruction.BitfieldExtractU32,       InstType.CallTernary,    "extract_bits");
+            Add(Instruction.BitfieldInsert,           InstType.CallQuaternary, "insert_bits");
+            Add(Instruction.BitfieldReverse,          InstType.CallUnary,      "reverse_bits");
+            Add(Instruction.BitwiseAnd,               InstType.OpBinaryCom,    "&",  6);
+            Add(Instruction.BitwiseExclusiveOr,       InstType.OpBinaryCom,    "^",  7);
+            Add(Instruction.BitwiseNot,               InstType.OpUnary,        "~",  0);
+            Add(Instruction.BitwiseOr,                InstType.OpBinaryCom,    "|",  8);
+            Add(Instruction.Call,                     InstType.Special);
+            Add(Instruction.Ceiling,                  InstType.CallUnary,      "ceil");
+            Add(Instruction.Clamp,                    InstType.CallTernary,    "clamp");
+            Add(Instruction.ClampU32,                 InstType.CallTernary,    "clamp");
+            Add(Instruction.CompareEqual,             InstType.OpBinaryCom,    "==", 5);
+            Add(Instruction.CompareGreater,           InstType.OpBinary,       ">",  4);
+            Add(Instruction.CompareGreaterOrEqual,    InstType.OpBinary,       ">=", 4);
+            Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary,       ">=", 4);
+            Add(Instruction.CompareGreaterU32,        InstType.OpBinary,       ">",  4);
+            Add(Instruction.CompareLess,              InstType.OpBinary,       "<",  4);
+            Add(Instruction.CompareLessOrEqual,       InstType.OpBinary,       "<=", 4);
+            Add(Instruction.CompareLessOrEqualU32,    InstType.OpBinary,       "<=", 4);
+            Add(Instruction.CompareLessU32,           InstType.OpBinary,       "<",  4);
+            Add(Instruction.CompareNotEqual,          InstType.OpBinaryCom,    "!=", 5);
+            Add(Instruction.ConditionalSelect,        InstType.OpTernary,      "?:", 12);
+            Add(Instruction.ConvertFP32ToFP64,        0); // MSL does not have a 64-bit FP
+            Add(Instruction.ConvertFP64ToFP32,        0); // MSL does not have a 64-bit FP
+            Add(Instruction.ConvertFP32ToS32,         InstType.CallUnary,      "int");
+            Add(Instruction.ConvertFP32ToU32,         InstType.CallUnary,      "uint");
+            Add(Instruction.ConvertFP64ToS32,         0); // MSL does not have a 64-bit FP
+            Add(Instruction.ConvertFP64ToU32,         0); // MSL does not have a 64-bit FP
+            Add(Instruction.ConvertS32ToFP32,         InstType.CallUnary,      "float");
+            Add(Instruction.ConvertS32ToFP64,         0); // MSL does not have a 64-bit FP
+            Add(Instruction.ConvertU32ToFP32,         InstType.CallUnary,      "float");
+            Add(Instruction.ConvertU32ToFP64,         0); // MSL does not have a 64-bit FP
+            Add(Instruction.Cosine,                   InstType.CallUnary,      "cos");
+            Add(Instruction.Ddx,                      InstType.CallUnary,      "dfdx");
+            Add(Instruction.Ddy,                      InstType.CallUnary,      "dfdy");
+            Add(Instruction.Discard,                  InstType.CallNullary,    "discard_fragment");
+            Add(Instruction.Divide,                   InstType.OpBinary,       "/",  1);
+            Add(Instruction.EmitVertex,               0); // MSL does not have geometry shaders
+            Add(Instruction.EndPrimitive,             0); // MSL does not have geometry shaders
+            Add(Instruction.ExponentB2,               InstType.CallUnary,      "exp2");
+            Add(Instruction.FSIBegin,                 InstType.Special);
+            Add(Instruction.FSIEnd,                   InstType.Special);
+            Add(Instruction.FindLSB,                  InstType.CallUnary,      HelperFunctionNames.FindLSB);
+            Add(Instruction.FindMSBS32,               InstType.CallUnary,      HelperFunctionNames.FindMSBS32);
+            Add(Instruction.FindMSBU32,               InstType.CallUnary,      HelperFunctionNames.FindMSBU32);
+            Add(Instruction.Floor,                    InstType.CallUnary,      "floor");
+            Add(Instruction.FusedMultiplyAdd,         InstType.CallTernary,    "fma");
+            Add(Instruction.GroupMemoryBarrier,       InstType.Special);
+            Add(Instruction.ImageLoad,                InstType.Special);
+            Add(Instruction.ImageStore,               InstType.Special);
+            Add(Instruction.ImageAtomic,              InstType.Special); // Metal 3.1+
+            Add(Instruction.IsNan,                    InstType.CallUnary,      "isnan");
+            Add(Instruction.Load,                     InstType.Special);
+            Add(Instruction.Lod,                      InstType.Special);
+            Add(Instruction.LogarithmB2,              InstType.CallUnary,      "log2");
+            Add(Instruction.LogicalAnd,               InstType.OpBinaryCom,    "&&", 9);
+            Add(Instruction.LogicalExclusiveOr,       InstType.OpBinaryCom,    "^",  10);
+            Add(Instruction.LogicalNot,               InstType.OpUnary,        "!",  0);
+            Add(Instruction.LogicalOr,                InstType.OpBinaryCom,    "||", 11);
+            Add(Instruction.LoopBreak,                InstType.OpNullary,      "break");
+            Add(Instruction.LoopContinue,             InstType.OpNullary,      "continue");
+            Add(Instruction.PackDouble2x32,           0); // MSL does not have a 64-bit FP
+            Add(Instruction.PackHalf2x16,             InstType.Special);
+            Add(Instruction.Maximum,                  InstType.CallBinary,     "max");
+            Add(Instruction.MaximumU32,               InstType.CallBinary,     "max");
+            Add(Instruction.MemoryBarrier,            InstType.Special);
+            Add(Instruction.Minimum,                  InstType.CallBinary,     "min");
+            Add(Instruction.MinimumU32,               InstType.CallBinary,     "min");
+            Add(Instruction.Modulo,                   InstType.CallBinary,     "fmod");
+            Add(Instruction.Multiply,                 InstType.OpBinaryCom,    "*",  1);
+            Add(Instruction.MultiplyHighS32,          InstType.CallBinary,     "mulhi");
+            Add(Instruction.MultiplyHighU32,          InstType.CallBinary,     "mulhi");
+            Add(Instruction.Negate,                   InstType.OpUnary,        "-");
+            Add(Instruction.ReciprocalSquareRoot,     InstType.CallUnary,      "rsqrt");
+            Add(Instruction.Return,                   InstType.OpNullary,      "return");
+            Add(Instruction.Round,                    InstType.CallUnary,      "round");
+            Add(Instruction.ShiftLeft,                InstType.OpBinary,       "<<", 3);
+            Add(Instruction.ShiftRightS32,            InstType.OpBinary,       ">>", 3);
+            Add(Instruction.ShiftRightU32,            InstType.OpBinary,       ">>", 3);
+            Add(Instruction.Shuffle,                  InstType.CallBinary,     "simd_shuffle");
+            Add(Instruction.ShuffleDown,              InstType.CallBinary,     "simd_shuffle_down");
+            Add(Instruction.ShuffleUp,                InstType.CallBinary,     "simd_shuffle_up");
+            Add(Instruction.ShuffleXor,               InstType.CallBinary,     "simd_shuffle_xor");
+            Add(Instruction.Sine,                     InstType.CallUnary,      "sin");
+            Add(Instruction.SquareRoot,               InstType.CallUnary,      "sqrt");
+            Add(Instruction.Store,                    InstType.Special);
+            Add(Instruction.Subtract,                 InstType.OpBinary,       "-",  2);
+            Add(Instruction.SwizzleAdd,               InstType.CallTernary,    HelperFunctionNames.SwizzleAdd);
+            Add(Instruction.TextureSample,            InstType.Special);
+            Add(Instruction.TextureQuerySamples,      InstType.Special);
+            Add(Instruction.TextureQuerySize,         InstType.Special);
+            Add(Instruction.Truncate,                 InstType.CallUnary,      "trunc");
+            Add(Instruction.UnpackDouble2x32,         0); // MSL does not have a 64-bit FP
+            Add(Instruction.UnpackHalf2x16,           InstType.Special);
+            Add(Instruction.VectorExtract,            InstType.Special);
+            Add(Instruction.VoteAll,                  InstType.CallUnary,      "simd_all");
+            Add(Instruction.VoteAllEqual,             InstType.Special);
+            Add(Instruction.VoteAny,                  InstType.CallUnary,      "simd_any");
+#pragma warning restore IDE0055
+        }
+
+        private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0)
+        {
+            _infoTable[(int)inst] = new InstInfo(flags, opName, precedence);
+        }
+
+        public static InstInfo GetInstructionInfo(Instruction inst)
+        {
+            return _infoTable[(int)(inst & Instruction.Mask)];
+        }
+
+        public static string GetSourceExpr(CodeGenContext context, IAstNode node, AggregateType dstType)
+        {
+            return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType);
+        }
+
+        public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs)
+        {
+            InstInfo pInfo = GetInstructionInfo(pInst);
+
+            return Enclose(expr, node, pInst, pInfo, isLhs);
+        }
+
+        public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false)
+        {
+            if (NeedsParenthesis(node, pInst, pInfo, isLhs))
+            {
+                expr = "(" + expr + ")";
+            }
+
+            return expr;
+        }
+
+        public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs)
+        {
+            // If the node isn't an operation, then it can only be an operand,
+            // and those never needs to be surrounded in parentheses.
+            if (node is not AstOperation operation)
+            {
+                // This is sort of a special case, if this is a negative constant,
+                // and it is consumed by a unary operation, we need to put on the parenthesis,
+                // as in MSL, while a sequence like ~-1 is valid, --2 is not.
+                if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary)
+                {
+                    return true;
+                }
+
+                return false;
+            }
+
+            if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0)
+            {
+                return false;
+            }
+
+            InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)];
+
+            if ((info.Type & (InstType.Call | InstType.Special)) != 0)
+            {
+                return false;
+            }
+
+            if (info.Precedence < pInfo.Precedence)
+            {
+                return false;
+            }
+
+            if (info.Precedence == pInfo.Precedence && isLhs)
+            {
+                return false;
+            }
+
+            if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom)
+            {
+                return false;
+            }
+
+            return true;
+        }
+
+        private static bool IsNegativeConst(IAstNode node)
+        {
+            if (node is not AstOperand operand)
+            {
+                return false;
+            }
+
+            return operand.Type == OperandType.Constant && operand.Value < 0;
+        }
+    }
+}

+ 672 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenMemory.cs

@@ -0,0 +1,672 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Text;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenMemory
+    {
+        public static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
+        {
+            StorageKind storageKind = operation.StorageKind;
+
+            string varName;
+            AggregateType varType;
+            int srcIndex = 0;
+            bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
+            int inputsCount = isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount;
+            bool fieldHasPadding = false;
+
+            if (operation.Inst == Instruction.AtomicCompareAndSwap)
+            {
+                inputsCount--;
+            }
+
+            string fieldName = "";
+            switch (storageKind)
+            {
+                case StorageKind.ConstantBuffer:
+                case StorageKind.StorageBuffer:
+                    if (operation.GetSource(srcIndex++) is not AstOperand bindingIndex || bindingIndex.Type != OperandType.Constant)
+                    {
+                        throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+                    }
+
+                    int binding = bindingIndex.Value;
+                    BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer
+                        ? context.Properties.ConstantBuffers[binding]
+                        : context.Properties.StorageBuffers[binding];
+
+                    if (operation.GetSource(srcIndex++) is not AstOperand fieldIndex || fieldIndex.Type != OperandType.Constant)
+                    {
+                        throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+                    }
+
+                    StructureField field = buffer.Type.Fields[fieldIndex.Value];
+
+                    fieldHasPadding = buffer.Layout == BufferLayout.Std140
+                                      && ((field.Type & AggregateType.Vector4) == 0)
+                                      && ((field.Type & AggregateType.Array) != 0);
+
+                    varName = storageKind == StorageKind.ConstantBuffer
+                        ? "constant_buffers"
+                        : "storage_buffers";
+                    varName += "." + buffer.Name;
+                    varName += "->" + field.Name;
+                    varType = field.Type;
+                    break;
+
+                case StorageKind.LocalMemory:
+                case StorageKind.SharedMemory:
+                    if (operation.GetSource(srcIndex++) is not AstOperand { Type: OperandType.Constant } bindingId)
+                    {
+                        throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+                    }
+
+                    MemoryDefinition memory = storageKind == StorageKind.LocalMemory
+                        ? context.Properties.LocalMemories[bindingId.Value]
+                        : context.Properties.SharedMemories[bindingId.Value];
+
+                    varName = memory.Name;
+                    varType = memory.Type;
+                    break;
+
+                case StorageKind.Input:
+                case StorageKind.InputPerPatch:
+                case StorageKind.Output:
+                case StorageKind.OutputPerPatch:
+                    if (operation.GetSource(srcIndex++) is not AstOperand varId || varId.Type != OperandType.Constant)
+                    {
+                        throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+                    }
+
+                    IoVariable ioVariable = (IoVariable)varId.Value;
+                    bool isOutput = storageKind.IsOutput();
+                    bool isPerPatch = storageKind.IsPerPatch();
+                    int location = -1;
+                    int component = 0;
+
+                    if (context.Definitions.HasPerLocationInputOrOutput(ioVariable, isOutput))
+                    {
+                        if (operation.GetSource(srcIndex++) is not AstOperand vecIndex || vecIndex.Type != OperandType.Constant)
+                        {
+                            throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+                        }
+
+                        location = vecIndex.Value;
+
+                        if (operation.SourcesCount > srcIndex &&
+                            operation.GetSource(srcIndex) is AstOperand elemIndex &&
+                            elemIndex.Type == OperandType.Constant &&
+                            context.Definitions.HasPerLocationInputOrOutputComponent(ioVariable, vecIndex.Value, elemIndex.Value, isOutput))
+                        {
+                            component = elemIndex.Value;
+                            srcIndex++;
+                        }
+                    }
+
+                    (varName, varType) = IoMap.GetMslBuiltIn(
+                        context.Definitions,
+                        ioVariable,
+                        location,
+                        component,
+                        isOutput,
+                        isPerPatch);
+                    break;
+
+                default:
+                    throw new InvalidOperationException($"Invalid storage kind {storageKind}.");
+            }
+
+            for (; srcIndex < inputsCount; srcIndex++)
+            {
+                IAstNode src = operation.GetSource(srcIndex);
+
+                if ((varType & AggregateType.ElementCountMask) != 0 &&
+                    srcIndex == inputsCount - 1 &&
+                    src is AstOperand elementIndex &&
+                    elementIndex.Type == OperandType.Constant)
+                {
+                    varName += "." + "xyzw"[elementIndex.Value & 3];
+                }
+                else
+                {
+                    varName += $"[{GetSourceExpr(context, src, AggregateType.S32)}]";
+                }
+            }
+            varName += fieldName;
+            varName += fieldHasPadding ? ".x" : "";
+
+            if (isStore)
+            {
+                varType &= AggregateType.ElementTypeMask;
+                varName = $"{varName} = {GetSourceExpr(context, operation.GetSource(srcIndex), varType)}";
+            }
+
+            return varName;
+        }
+
+        public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation)
+        {
+            AstTextureOperation texOp = (AstTextureOperation)operation;
+
+            bool isArray = (texOp.Type & SamplerType.Array) != 0;
+
+            var texCallBuilder = new StringBuilder();
+
+            int srcIndex = 0;
+
+            string Src(AggregateType type)
+            {
+                return GetSourceExpr(context, texOp.GetSource(srcIndex++), type);
+            }
+
+            string imageName = GetImageName(context, texOp, ref srcIndex);
+            texCallBuilder.Append(imageName);
+            texCallBuilder.Append('.');
+
+            if (texOp.Inst == Instruction.ImageAtomic)
+            {
+                texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch
+                {
+                    TextureFlags.Add => "atomic_fetch_add",
+                    TextureFlags.Minimum => "atomic_min",
+                    TextureFlags.Maximum => "atomic_max",
+                    TextureFlags.Increment => "atomic_fetch_add",
+                    TextureFlags.Decrement => "atomic_fetch_sub",
+                    TextureFlags.BitwiseAnd => "atomic_fetch_and",
+                    TextureFlags.BitwiseOr => "atomic_fetch_or",
+                    TextureFlags.BitwiseXor => "atomic_fetch_xor",
+                    TextureFlags.Swap => "atomic_exchange",
+                    TextureFlags.CAS => "atomic_compare_exchange_weak",
+                    _ => "atomic_fetch_add",
+                });
+            }
+            else
+            {
+                texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "read" : "write");
+            }
+
+            texCallBuilder.Append('(');
+
+            var coordsBuilder = new StringBuilder();
+
+            int coordsCount = texOp.Type.GetDimensions();
+
+            if (coordsCount > 1)
+            {
+                string[] elems = new string[coordsCount];
+
+                for (int index = 0; index < coordsCount; index++)
+                {
+                    elems[index] = Src(AggregateType.S32);
+                }
+
+                coordsBuilder.Append($"uint{coordsCount}({string.Join(", ", elems)})");
+            }
+            else
+            {
+                coordsBuilder.Append($"uint({Src(AggregateType.S32)})");
+            }
+
+            if (isArray)
+            {
+                coordsBuilder.Append(", ");
+                coordsBuilder.Append(Src(AggregateType.S32));
+            }
+
+            if (texOp.Inst == Instruction.ImageStore)
+            {
+                AggregateType type = texOp.Format.GetComponentType();
+
+                string[] cElems = new string[4];
+
+                for (int index = 0; index < 4; index++)
+                {
+                    if (srcIndex < texOp.SourcesCount)
+                    {
+                        cElems[index] = Src(type);
+                    }
+                    else
+                    {
+                        cElems[index] = type switch
+                        {
+                            AggregateType.S32 => NumberFormatter.FormatInt(0),
+                            AggregateType.U32 => NumberFormatter.FormatUint(0),
+                            _ => NumberFormatter.FormatFloat(0),
+                        };
+                    }
+                }
+
+                string prefix = type switch
+                {
+                    AggregateType.S32 => "int",
+                    AggregateType.U32 => "uint",
+                    AggregateType.FP32 => "float",
+                    _ => string.Empty,
+                };
+
+                texCallBuilder.Append($"{prefix}4({string.Join(", ", cElems)})");
+                texCallBuilder.Append(", ");
+            }
+
+            texCallBuilder.Append(coordsBuilder);
+
+            if (texOp.Inst == Instruction.ImageAtomic)
+            {
+                texCallBuilder.Append(", ");
+
+                AggregateType type = texOp.Format.GetComponentType();
+
+                if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS)
+                {
+                    texCallBuilder.Append(Src(type)); // Compare value.
+                }
+
+                string value = (texOp.Flags & TextureFlags.AtomicMask) switch
+                {
+                    TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value
+                    TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value
+                    _ => Src(type),
+                };
+
+                texCallBuilder.Append(value);
+                // This doesn't match what the MSL spec document says so either
+                // it is wrong or the MSL compiler has a bug.
+                texCallBuilder.Append(")[0]");
+            }
+            else
+            {
+                texCallBuilder.Append(')');
+
+                if (texOp.Inst == Instruction.ImageLoad)
+                {
+                    texCallBuilder.Append(GetMaskMultiDest(texOp.Index));
+                }
+            }
+
+            return texCallBuilder.ToString();
+        }
+
+        public static string Load(CodeGenContext context, AstOperation operation)
+        {
+            return GenerateLoadOrStore(context, operation, isStore: false);
+        }
+
+        public static string Lod(CodeGenContext context, AstOperation operation)
+        {
+            AstTextureOperation texOp = (AstTextureOperation)operation;
+
+            int coordsCount = texOp.Type.GetDimensions();
+            int coordsIndex = 0;
+
+            string textureName = GetTextureName(context, texOp, ref coordsIndex);
+            string samplerName = GetSamplerName(context, texOp, ref coordsIndex);
+
+            string coordsExpr;
+
+            if (coordsCount > 1)
+            {
+                string[] elems = new string[coordsCount];
+
+                for (int index = 0; index < coordsCount; index++)
+                {
+                    elems[index] = GetSourceExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32);
+                }
+
+                coordsExpr = "float" + coordsCount + "(" + string.Join(", ", elems) + ")";
+            }
+            else
+            {
+                coordsExpr = GetSourceExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32);
+            }
+
+            var clamped = $"{textureName}.calculate_clamped_lod({samplerName}, {coordsExpr})";
+            var unclamped = $"{textureName}.calculate_unclamped_lod({samplerName}, {coordsExpr})";
+
+            return $"float2({clamped}, {unclamped}){GetMask(texOp.Index)}";
+        }
+
+        public static string Store(CodeGenContext context, AstOperation operation)
+        {
+            return GenerateLoadOrStore(context, operation, isStore: true);
+        }
+
+        public static string TextureSample(CodeGenContext context, AstOperation operation)
+        {
+            AstTextureOperation texOp = (AstTextureOperation)operation;
+
+            bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+            bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+            bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+            bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+            bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+            bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+            bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+            bool isArray = (texOp.Type & SamplerType.Array) != 0;
+            bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+            var texCallBuilder = new StringBuilder();
+
+            bool colorIsVector = isGather || !isShadow;
+
+            int srcIndex = 0;
+
+            string Src(AggregateType type)
+            {
+                return GetSourceExpr(context, texOp.GetSource(srcIndex++), type);
+            }
+
+            string textureName = GetTextureName(context, texOp, ref srcIndex);
+            string samplerName = GetSamplerName(context, texOp, ref srcIndex);
+
+            texCallBuilder.Append(textureName);
+            texCallBuilder.Append('.');
+
+            if (intCoords)
+            {
+                texCallBuilder.Append("read(");
+            }
+            else
+            {
+                if (isGather)
+                {
+                    texCallBuilder.Append("gather");
+                }
+                else
+                {
+                    texCallBuilder.Append("sample");
+                }
+
+                if (isShadow)
+                {
+                    texCallBuilder.Append("_compare");
+                }
+
+                texCallBuilder.Append($"({samplerName}, ");
+            }
+
+            int coordsCount = texOp.Type.GetDimensions();
+
+            int pCount = coordsCount;
+
+            bool appended = false;
+            void Append(string str)
+            {
+                if (appended)
+                {
+                    texCallBuilder.Append(", ");
+                }
+                else
+                {
+                    appended = true;
+                }
+
+                texCallBuilder.Append(str);
+            }
+
+            AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32;
+
+            string AssemblePVector(int count)
+            {
+                string coords;
+                if (count > 1)
+                {
+                    string[] elems = new string[count];
+
+                    for (int index = 0; index < count; index++)
+                    {
+                        elems[index] = Src(coordType);
+                    }
+
+                    coords = string.Join(", ", elems);
+                }
+                else
+                {
+                    coords = Src(coordType);
+                }
+
+                string prefix = intCoords ? "uint" : "float";
+
+                return prefix + (count > 1 ? count : "") + "(" + coords + ")";
+            }
+
+            Append(AssemblePVector(pCount));
+
+            if (isArray)
+            {
+                Append(Src(AggregateType.S32));
+            }
+
+            if (isShadow)
+            {
+                Append(Src(AggregateType.FP32));
+            }
+
+            if (hasDerivatives)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, "Unused sampler derivatives!");
+            }
+
+            if (hasLodBias)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, "Unused sample LOD bias!");
+            }
+
+            if (hasLodLevel)
+            {
+                if (intCoords)
+                {
+                    Append(Src(coordType));
+                }
+                else
+                {
+                    Append($"level({Src(coordType)})");
+                }
+            }
+
+            string AssembleOffsetVector(int count)
+            {
+                if (count > 1)
+                {
+                    string[] elems = new string[count];
+
+                    for (int index = 0; index < count; index++)
+                    {
+                        elems[index] = Src(AggregateType.S32);
+                    }
+
+                    return "int" + count + "(" + string.Join(", ", elems) + ")";
+                }
+                else
+                {
+                    return Src(AggregateType.S32);
+                }
+            }
+
+            // TODO: Support reads with offsets
+            if (!intCoords)
+            {
+                if (hasOffset)
+                {
+                    Append(AssembleOffsetVector(coordsCount));
+                }
+                else if (hasOffsets)
+                {
+                    Logger.Warning?.PrintMsg(LogClass.Gpu, "Multiple offsets on gathers are not yet supported!");
+                }
+            }
+
+            texCallBuilder.Append(')');
+            texCallBuilder.Append(colorIsVector ? GetMaskMultiDest(texOp.Index) : "");
+
+            return texCallBuilder.ToString();
+        }
+
+        private static string GetTextureName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex)
+        {
+            TextureDefinition textureDefinition = context.Properties.Textures[texOp.GetTextureSetAndBinding()];
+            string name = textureDefinition.Name;
+            string setName = Declarations.GetNameForSet(textureDefinition.Set, true);
+
+            if (textureDefinition.ArrayLength != 1)
+            {
+                name = $"{name}[{GetSourceExpr(context, texOp.GetSource(srcIndex++), AggregateType.S32)}]";
+            }
+
+            return $"{setName}.tex_{name}";
+        }
+
+        private static string GetSamplerName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex)
+        {
+            var index = texOp.IsSeparate ? texOp.GetSamplerSetAndBinding() : texOp.GetTextureSetAndBinding();
+            var sourceIndex = texOp.IsSeparate ? srcIndex++ : srcIndex + 1;
+
+            TextureDefinition samplerDefinition = context.Properties.Textures[index];
+            string name = samplerDefinition.Name;
+            string setName = Declarations.GetNameForSet(samplerDefinition.Set, true);
+
+            if (samplerDefinition.ArrayLength != 1)
+            {
+                name = $"{name}[{GetSourceExpr(context, texOp.GetSource(sourceIndex), AggregateType.S32)}]";
+            }
+
+            return $"{setName}.samp_{name}";
+        }
+
+        private static string GetImageName(CodeGenContext context, AstTextureOperation texOp, ref int srcIndex)
+        {
+            TextureDefinition imageDefinition = context.Properties.Images[texOp.GetTextureSetAndBinding()];
+            string name = imageDefinition.Name;
+            string setName = Declarations.GetNameForSet(imageDefinition.Set, true);
+
+            if (imageDefinition.ArrayLength != 1)
+            {
+                name = $"{name}[{GetSourceExpr(context, texOp.GetSource(srcIndex++), AggregateType.S32)}]";
+            }
+
+            return $"{setName}.{name}";
+        }
+
+        private static string GetMaskMultiDest(int mask)
+        {
+            if (mask == 0x0)
+            {
+                return "";
+            }
+
+            string swizzle = ".";
+
+            for (int i = 0; i < 4; i++)
+            {
+                if ((mask & (1 << i)) != 0)
+                {
+                    swizzle += "xyzw"[i];
+                }
+            }
+
+            return swizzle;
+        }
+
+        public static string TextureQuerySamples(CodeGenContext context, AstOperation operation)
+        {
+            AstTextureOperation texOp = (AstTextureOperation)operation;
+
+            int srcIndex = 0;
+
+            string textureName = GetTextureName(context, texOp, ref srcIndex);
+
+            return $"{textureName}.get_num_samples()";
+        }
+
+        public static string TextureQuerySize(CodeGenContext context, AstOperation operation)
+        {
+            AstTextureOperation texOp = (AstTextureOperation)operation;
+
+            var texCallBuilder = new StringBuilder();
+
+            int srcIndex = 0;
+
+            string textureName = GetTextureName(context, texOp, ref srcIndex);
+            texCallBuilder.Append(textureName);
+            texCallBuilder.Append('.');
+
+            if (texOp.Index == 3)
+            {
+                texCallBuilder.Append("get_num_mip_levels()");
+            }
+            else
+            {
+                context.Properties.Textures.TryGetValue(texOp.GetTextureSetAndBinding(), out TextureDefinition definition);
+                bool hasLod = !definition.Type.HasFlag(SamplerType.Multisample) && (definition.Type & SamplerType.Mask) != SamplerType.TextureBuffer;
+                bool isArray = definition.Type.HasFlag(SamplerType.Array);
+                texCallBuilder.Append("get_");
+
+                if (texOp.Index == 0)
+                {
+                    texCallBuilder.Append("width");
+                }
+                else if (texOp.Index == 1)
+                {
+                    texCallBuilder.Append("height");
+                }
+                else
+                {
+                    if (isArray)
+                    {
+                        texCallBuilder.Append("array_size");
+                    }
+                    else
+                    {
+                        texCallBuilder.Append("depth");
+                    }
+                }
+
+                texCallBuilder.Append('(');
+
+                if (hasLod && !isArray)
+                {
+                    IAstNode lod = operation.GetSource(0);
+                    string lodExpr = GetSourceExpr(context, lod, GetSrcVarType(operation.Inst, 0));
+
+                    texCallBuilder.Append(lodExpr);
+                }
+
+                texCallBuilder.Append(')');
+            }
+
+            return texCallBuilder.ToString();
+        }
+
+        public static string PackHalf2x16(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src0 = operation.GetSource(0);
+            IAstNode src1 = operation.GetSource(1);
+
+            string src0Expr = GetSourceExpr(context, src0, GetSrcVarType(operation.Inst, 0));
+            string src1Expr = GetSourceExpr(context, src1, GetSrcVarType(operation.Inst, 1));
+
+            return $"as_type<uint>(half2({src0Expr}, {src1Expr}))";
+        }
+
+        public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode src = operation.GetSource(0);
+
+            string srcExpr = GetSourceExpr(context, src, GetSrcVarType(operation.Inst, 0));
+
+            return $"float2(as_type<half2>({srcExpr})){GetMask(operation.Index)}";
+        }
+
+        private static string GetMask(int index)
+        {
+            return $".{"xy".AsSpan(index, 1)}";
+        }
+    }
+}

+ 32 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstGenVector.cs

@@ -0,0 +1,32 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class InstGenVector
+    {
+        public static string VectorExtract(CodeGenContext context, AstOperation operation)
+        {
+            IAstNode vector = operation.GetSource(0);
+            IAstNode index = operation.GetSource(1);
+
+            string vectorExpr = GetSourceExpr(context, vector, OperandManager.GetNodeDestType(context, vector));
+
+            if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant)
+            {
+                char elem = "xyzw"[indexOperand.Value];
+
+                return $"{vectorExpr}.{elem}";
+            }
+            else
+            {
+                string indexExpr = GetSourceExpr(context, index, GetSrcVarType(operation.Inst, 1));
+
+                return $"{vectorExpr}[{indexExpr}]";
+            }
+        }
+    }
+}

+ 18 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstInfo.cs

@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    readonly struct InstInfo
+    {
+        public InstType Type { get; }
+
+        public string OpName { get; }
+
+        public int Precedence { get; }
+
+        public InstInfo(InstType type, string opName, int precedence)
+        {
+            Type = type;
+            OpName = opName;
+            Precedence = precedence;
+        }
+    }
+}

+ 35 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/InstType.cs

@@ -0,0 +1,35 @@
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    [Flags]
+    [SuppressMessage("Design", "CA1069: Enums values should not be duplicated")]
+    public enum InstType
+    {
+        OpNullary = Op | 0,
+        OpUnary = Op | 1,
+        OpBinary = Op | 2,
+        OpBinaryCom = Op | 2 | Commutative,
+        OpTernary = Op | 3,
+
+        CallNullary = Call | 0,
+        CallUnary = Call | 1,
+        CallBinary = Call | 2,
+        CallTernary = Call | 3,
+        CallQuaternary = Call | 4,
+
+        // The atomic instructions have one extra operand,
+        // for the storage slot and offset pair.
+        AtomicBinary = Call | Atomic | 3,
+        AtomicTernary = Call | Atomic | 4,
+
+        Commutative = 1 << 8,
+        Op = 1 << 9,
+        Call = 1 << 10,
+        Atomic = 1 << 11,
+        Special = 1 << 12,
+
+        ArityMask = 0xff,
+    }
+}

+ 83 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/Instructions/IoMap.cs

@@ -0,0 +1,83 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Globalization;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions
+{
+    static class IoMap
+    {
+        public static (string, AggregateType) GetMslBuiltIn(
+            ShaderDefinitions definitions,
+            IoVariable ioVariable,
+            int location,
+            int component,
+            bool isOutput,
+            bool isPerPatch)
+        {
+            var returnValue = ioVariable switch
+            {
+                IoVariable.BaseInstance => ("base_instance", AggregateType.U32),
+                IoVariable.BaseVertex => ("base_vertex", AggregateType.U32),
+                IoVariable.CtaId => ("threadgroup_position_in_grid", AggregateType.Vector3 | AggregateType.U32),
+                IoVariable.ClipDistance => ("out.clip_distance", AggregateType.Array | AggregateType.FP32),
+                IoVariable.FragmentOutputColor => ($"out.color{location}", definitions.GetFragmentOutputColorType(location)),
+                IoVariable.FragmentOutputDepth => ("out.depth", AggregateType.FP32),
+                IoVariable.FrontFacing => ("in.front_facing", AggregateType.Bool),
+                IoVariable.GlobalId => ("thread_position_in_grid", AggregateType.Vector3 | AggregateType.U32),
+                IoVariable.InstanceId => ("instance_id", AggregateType.U32),
+                IoVariable.InstanceIndex => ("instance_index", AggregateType.U32),
+                IoVariable.InvocationId => ("INVOCATION_ID", AggregateType.S32),
+                IoVariable.PointCoord => ("in.point_coord", AggregateType.Vector2 | AggregateType.FP32),
+                IoVariable.PointSize => ("out.point_size", AggregateType.FP32),
+                IoVariable.Position => ("out.position", AggregateType.Vector4 | AggregateType.FP32),
+                IoVariable.PrimitiveId => ("in.primitive_id", AggregateType.U32),
+                IoVariable.SubgroupEqMask => ("thread_index_in_simdgroup >= 32 ? uint4(0, (1 << (thread_index_in_simdgroup - 32)), uint2(0)) : uint4(1 << thread_index_in_simdgroup, uint3(0))", AggregateType.Vector4 | AggregateType.U32),
+                IoVariable.SubgroupGeMask => ("uint4(insert_bits(0u, 0xFFFFFFFF, thread_index_in_simdgroup, 32 - thread_index_in_simdgroup), uint3(0)) & (uint4((uint)((simd_vote::vote_t)simd_ballot(true) & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)simd_ballot(true) >> 32) & 0xFFFFFFFF), 0, 0))", AggregateType.Vector4 | AggregateType.U32),
+                IoVariable.SubgroupGtMask => ("uint4(insert_bits(0u, 0xFFFFFFFF, thread_index_in_simdgroup + 1, 32 - thread_index_in_simdgroup - 1), uint3(0)) & (uint4((uint)((simd_vote::vote_t)simd_ballot(true) & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)simd_ballot(true) >> 32) & 0xFFFFFFFF), 0, 0))", AggregateType.Vector4 | AggregateType.U32),
+                IoVariable.SubgroupLaneId => ("thread_index_in_simdgroup", AggregateType.U32),
+                IoVariable.SubgroupLeMask => ("uint4(extract_bits(0xFFFFFFFF, 0, min(thread_index_in_simdgroup + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)thread_index_in_simdgroup + 1 - 32, 0)), uint2(0))", AggregateType.Vector4 | AggregateType.U32),
+                IoVariable.SubgroupLtMask => ("uint4(extract_bits(0xFFFFFFFF, 0, min(thread_index_in_simdgroup, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)thread_index_in_simdgroup - 32, 0)), uint2(0))", AggregateType.Vector4 | AggregateType.U32),
+                IoVariable.ThreadKill => ("simd_is_helper_thread()", AggregateType.Bool),
+                IoVariable.UserDefined => GetUserDefinedVariableName(definitions, location, component, isOutput, isPerPatch),
+                IoVariable.ThreadId => ("thread_position_in_threadgroup", AggregateType.Vector3 | AggregateType.U32),
+                IoVariable.VertexId => ("vertex_id", AggregateType.S32),
+                // gl_VertexIndex does not have a direct equivalent in MSL
+                IoVariable.VertexIndex => ("vertex_id", AggregateType.U32),
+                IoVariable.ViewportIndex => ("viewport_array_index", AggregateType.S32),
+                IoVariable.FragmentCoord => ("in.position", AggregateType.Vector4 | AggregateType.FP32),
+                _ => (null, AggregateType.Invalid),
+            };
+
+            if (returnValue.Item2 == AggregateType.Invalid)
+            {
+                Logger.Warning?.PrintMsg(LogClass.Gpu, $"Unable to find type for IoVariable {ioVariable}!");
+            }
+
+            return returnValue;
+        }
+
+        private static (string, AggregateType) GetUserDefinedVariableName(ShaderDefinitions definitions, int location, int component, bool isOutput, bool isPerPatch)
+        {
+            string name = isPerPatch
+                ? Defaults.PerPatchAttributePrefix
+                : (isOutput ? Defaults.OAttributePrefix : Defaults.IAttributePrefix);
+
+            if (location < 0)
+            {
+                return (name, definitions.GetUserDefinedType(0, isOutput));
+            }
+
+            name += location.ToString(CultureInfo.InvariantCulture);
+
+            if (definitions.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput))
+            {
+                name += "_" + "xyzw"[component & 3];
+            }
+
+            string prefix = isOutput ? "out" : "in";
+
+            return (prefix + "." + name, definitions.GetUserDefinedType(location, isOutput));
+        }
+    }
+}

+ 286 - 0
src/Ryujinx.Graphics.Shader/CodeGen/Msl/MslGenerator.cs

@@ -0,0 +1,286 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Shader.CodeGen.Msl.Instructions;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Linq;
+using static Ryujinx.Graphics.Shader.CodeGen.Msl.TypeConversion;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Msl
+{
+    static class MslGenerator
+    {
+        public static string Generate(StructuredProgramInfo info, CodeGenParameters parameters)
+        {
+            if (parameters.Definitions.Stage is not (ShaderStage.Vertex or ShaderStage.Fragment or ShaderStage.Compute))
+            {
+                Logger.Warning?.Print(LogClass.Gpu, $"Attempted to generate unsupported shader type {parameters.Definitions.Stage}!");
+                return "";
+            }
+
+            CodeGenContext context = new(info, parameters);
+
+            var sets = Declarations.Declare(context, info);
+
+            if (info.Functions.Count != 0)
+            {
+                for (int i = 1; i < info.Functions.Count; i++)
+                {
+                    PrintFunction(context, info.Functions[i], parameters.Definitions.Stage, sets);
+
+                    context.AppendLine();
+                }
+            }
+
+            PrintFunction(context, info.Functions[0], parameters.Definitions.Stage, sets, true);
+
+            return context.GetCode();
+        }
+
+        private static void PrintFunction(CodeGenContext context, StructuredFunction function, ShaderStage stage, int[] sets, bool isMainFunc = false)
+        {
+            context.CurrentFunction = function;
+
+            context.AppendLine(GetFunctionSignature(context, function, stage, sets, isMainFunc));
+            context.EnterScope();
+
+            Declarations.DeclareLocals(context, function, stage, isMainFunc);
+
+            PrintBlock(context, function.MainBlock, isMainFunc);
+
+            // In case the shader hasn't returned, return
+            if (isMainFunc && stage != ShaderStage.Compute)
+            {
+                context.AppendLine("return out;");
+            }
+
+            context.LeaveScope();
+        }
+
+        private static string GetFunctionSignature(
+            CodeGenContext context,
+            StructuredFunction function,
+            ShaderStage stage,
+            int[] sets,
+            bool isMainFunc = false)
+        {
+            int additionalArgCount = isMainFunc ? 0 : CodeGenContext.AdditionalArgCount + (context.Definitions.Stage != ShaderStage.Compute ? 1 : 0);
+            bool needsThreadIndex = false;
+
+            // TODO: Replace this with a proper flag
+            if (function.Name.Contains("Shuffle"))
+            {
+                needsThreadIndex = true;
+                additionalArgCount++;
+            }
+
+            string[] args = new string[additionalArgCount + function.InArguments.Length + function.OutArguments.Length];
+
+            // All non-main functions need to be able to access the support_buffer as well
+            if (!isMainFunc)
+            {
+                if (stage != ShaderStage.Compute)
+                {
+                    args[0] = stage == ShaderStage.Vertex ? "VertexIn in" : "FragmentIn in";
+                    args[1] = "constant ConstantBuffers &constant_buffers";
+                    args[2] = "device StorageBuffers &storage_buffers";
+
+                    if (needsThreadIndex)
+                    {
+                        args[3] = "uint thread_index_in_simdgroup";
+                    }
+                }
+                else
+                {
+                    args[0] = "constant ConstantBuffers &constant_buffers";
+                    args[1] = "device StorageBuffers &storage_buffers";
+
+                    if (needsThreadIndex)
+                    {
+                        args[2] = "uint thread_index_in_simdgroup";
+                    }
+                }
+            }
+
+            int argIndex = additionalArgCount;
+            for (int i = 0; i < function.InArguments.Length; i++)
+            {
+                args[argIndex++] = $"{Declarations.GetVarTypeName(function.InArguments[i])} {OperandManager.GetArgumentName(i)}";
+            }
+
+            for (int i = 0; i < function.OutArguments.Length; i++)
+            {
+                int j = i + function.InArguments.Length;
+
+                args[argIndex++] = $"thread {Declarations.GetVarTypeName(function.OutArguments[i])} &{OperandManager.GetArgumentName(j)}";
+            }
+
+            string funcKeyword = "inline";
+            string funcName = null;
+            string returnType = Declarations.GetVarTypeName(function.ReturnType);
+
+            if (isMainFunc)
+            {
+                if (stage == ShaderStage.Vertex)
+                {
+                    funcKeyword = "vertex";
+                    funcName = "vertexMain";
+                    returnType = "VertexOut";
+                }
+                else if (stage == ShaderStage.Fragment)
+                {
+                    funcKeyword = "fragment";
+                    funcName = "fragmentMain";
+                    returnType = "FragmentOut";
+                }
+                else if (stage == ShaderStage.Compute)
+                {
+                    funcKeyword = "kernel";
+                    funcName = "kernelMain";
+                    returnType = "void";
+                }
+
+                if (stage == ShaderStage.Vertex)
+                {
+                    args = args.Prepend("VertexIn in [[stage_in]]").ToArray();
+                }
+                else if (stage == ShaderStage.Fragment)
+                {
+                    args = args.Prepend("FragmentIn in [[stage_in]]").ToArray();
+                }
+
+                // TODO: add these only if they are used
+                if (stage == ShaderStage.Vertex)
+                {
+                    args = args.Append("uint vertex_id [[vertex_id]]").ToArray();
+                    args = args.Append("uint instance_id [[instance_id]]").ToArray();
+                    args = args.Append("uint base_instance [[base_instance]]").ToArray();
+                    args = args.Append("uint base_vertex [[base_vertex]]").ToArray();
+                }
+                else if (stage == ShaderStage.Compute)
+                {
+                    args = args.Append("uint3 threadgroup_position_in_grid [[threadgroup_position_in_grid]]").ToArray();
+                    args = args.Append("uint3 thread_position_in_grid [[thread_position_in_grid]]").ToArray();
+                    args = args.Append("uint3 thread_position_in_threadgroup [[thread_position_in_threadgroup]]").ToArray();
+                    args = args.Append("uint thread_index_in_simdgroup [[thread_index_in_simdgroup]]").ToArray();
+                }
+
+                args = args.Append($"constant ConstantBuffers &constant_buffers [[buffer({Defaults.ConstantBuffersIndex})]]").ToArray();
+                args = args.Append($"device StorageBuffers &storage_buffers [[buffer({Defaults.StorageBuffersIndex})]]").ToArray();
+
+                foreach (var set in sets)
+                {
+                    var bindingIndex = set + Defaults.BaseSetIndex;
+                    args = args.Append($"constant {Declarations.GetNameForSet(set)} &{Declarations.GetNameForSet(set, true)} [[buffer({bindingIndex})]]").ToArray();
+                }
+            }
+
+            var funcPrefix = $"{funcKeyword} {returnType} {funcName ?? function.Name}(";
+            var indent = new string(' ', funcPrefix.Length);
+
+            return $"{funcPrefix}{string.Join($", \n{indent}", args)})";
+        }
+
+        private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
+        {
+            AstBlockVisitor visitor = new(block);
+
+            visitor.BlockEntered += (sender, e) =>
+            {
+                switch (e.Block.Type)
+                {
+                    case AstBlockType.DoWhile:
+                        context.AppendLine("do");
+                        break;
+
+                    case AstBlockType.Else:
+                        context.AppendLine("else");
+                        break;
+
+                    case AstBlockType.ElseIf:
+                        context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})");
+                        break;
+
+                    case AstBlockType.If:
+                        context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})");
+                        break;
+
+                    default:
+                        throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\".");
+                }
+
+                context.EnterScope();
+            };
+
+            visitor.BlockLeft += (sender, e) =>
+            {
+                context.LeaveScope();
+
+                if (e.Block.Type == AstBlockType.DoWhile)
+                {
+                    context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});");
+                }
+            };
+
+            bool supportsBarrierDivergence = context.HostCapabilities.SupportsShaderBarrierDivergence;
+            bool mayHaveReturned = false;
+
+            foreach (IAstNode node in visitor.Visit())
+            {
+                if (node is AstOperation operation)
+                {
+                    if (!supportsBarrierDivergence)
+                    {
+                        if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
+                        {
+                            // Barrier on divergent control flow paths may cause the GPU to hang,
+                            // so skip emitting the barrier for those cases.
+                            if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
+                            {
+                                context.Logger.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
+
+                                continue;
+                            }
+                        }
+                        else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
+                        {
+                            mayHaveReturned = true;
+                        }
+                    }
+
+                    string expr = InstGen.GetExpression(context, operation);
+
+                    if (expr != null)
+                    {
+                        context.AppendLine(expr + ";");
+                    }
+                }
+                else if (node is AstAssignment assignment)
+                {
+                    AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination);
+                    AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source);
+
+                    string dest = InstGen.GetExpression(context, assignment.Destination);
+                    string src = ReinterpretCast(context, assignment.Source, srcType, dstType);
+
+                    context.AppendLine(dest + " = " + src + ";");
+                }
+                else if (node is AstComment comment)
+                {
+                    context.AppendLine("// " + comment.Comment);
+                }
+                else
+                {
+                    throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\".");
+                }
+            }
+        }
+
+        private static string GetCondExpr(CodeGenContext context, IAstNode cond)
+        {
+            AggregateType srcType = OperandManager.GetNodeDestType(context, cond);
+
+            return ReinterpretCast(context, cond, srcType, AggregateType.Bool);
+        }
+    }
+}

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.