Browse Source

Implement transform feedback emulation for hardware without native support (#5080)

* Implement transform feedback emulation for hardware without native support

* Stop doing some useless buffer updates and account for non-zero base instance

* Reduce redundant updates even more

* Update descriptor init logic to account for ResourceLayout

* Fix transform feedback and storage buffers not being updated in some cases

* Shader cache version bump

* PR feedback

* SetInstancedDrawVertexCount must be always called after UpdateState

* Minor typo
gdkchan 2 years ago
parent
commit
eb0bb36bbf

+ 3 - 0
src/Ryujinx.Graphics.GAL/Capabilities.cs

@@ -28,6 +28,7 @@ namespace Ryujinx.Graphics.GAL
         public readonly bool SupportsFragmentShaderOrderingIntel;
         public readonly bool SupportsGeometryShader;
         public readonly bool SupportsGeometryShaderPassthrough;
+        public readonly bool SupportsTransformFeedback;
         public readonly bool SupportsImageLoadFormatted;
         public readonly bool SupportsLayerVertexTessellation;
         public readonly bool SupportsMismatchingViewFormat;
@@ -77,6 +78,7 @@ namespace Ryujinx.Graphics.GAL
             bool supportsFragmentShaderOrderingIntel,
             bool supportsGeometryShader,
             bool supportsGeometryShaderPassthrough,
+            bool supportsTransformFeedback,
             bool supportsImageLoadFormatted,
             bool supportsLayerVertexTessellation,
             bool supportsMismatchingViewFormat,
@@ -122,6 +124,7 @@ namespace Ryujinx.Graphics.GAL
             SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
             SupportsGeometryShader = supportsGeometryShader;
             SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
+            SupportsTransformFeedback = supportsTransformFeedback;
             SupportsImageLoadFormatted = supportsImageLoadFormatted;
             SupportsLayerVertexTessellation = supportsLayerVertexTessellation;
             SupportsMismatchingViewFormat = supportsMismatchingViewFormat;

+ 12 - 0
src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs

@@ -539,6 +539,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
 
             engine.UpdateState();
 
+            if (instanceCount > 1)
+            {
+                // Must be called after UpdateState as it assumes the shader state
+                // has already been set, and that bindings have been updated already.
+
+                _channel.BufferManager.SetInstancedDrawVertexCount(count);
+            }
+
             if (indexed)
             {
                 _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance);
@@ -676,6 +684,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
                         _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);
                     }
 
+                    _channel.BufferManager.SetInstancedDrawVertexCount(_instancedIndexCount);
+
                     _context.Renderer.Pipeline.DrawIndexed(
                         _instancedIndexCount,
                         _instanceIndex + 1,
@@ -685,6 +695,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
                 }
                 else
                 {
+                    _channel.BufferManager.SetInstancedDrawVertexCount(_instancedDrawStateCount);
+
                     _context.Renderer.Pipeline.Draw(
                         _instancedDrawStateCount,
                         _instanceIndex + 1,

+ 17 - 1
src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs

@@ -269,7 +269,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
                 _prevFirstVertex = _state.State.FirstVertex;
             }
 
-            bool tfEnable = _state.State.TfEnable;
+            bool tfEnable = _state.State.TfEnable && _context.Capabilities.SupportsTransformFeedback;
 
             if (!tfEnable && _prevTfEnable)
             {
@@ -1367,6 +1367,22 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
             _vsUsesDrawParameters = gs.Shaders[1]?.Info.UsesDrawParameters ?? false;
             _vsClipDistancesWritten = gs.Shaders[1]?.Info.ClipDistancesWritten ?? 0;
 
+            bool hasTransformFeedback = gs.SpecializationState.TransformFeedbackDescriptors != null;
+            if (hasTransformFeedback != _channel.BufferManager.HasTransformFeedbackOutputs)
+            {
+                if (!_context.Capabilities.SupportsTransformFeedback)
+                {
+                    // If host does not support transform feedback, and the shader changed,
+                    // we might need to update bindings as transform feedback emulation
+                    // uses storage buffer bindings that might have been used for something
+                    // else in a previous draw.
+
+                    _channel.BufferManager.ForceTransformFeedbackAndStorageBuffersDirty();
+                }
+
+                _channel.BufferManager.HasTransformFeedbackOutputs = hasTransformFeedback;
+            }
+
             if (oldVsClipDistancesWritten != _vsClipDistancesWritten)
             {
                 UpdateUserClipState();

+ 101 - 9
src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs

@@ -6,6 +6,7 @@ using Ryujinx.Graphics.Shader;
 using System;
 using System.Collections.Generic;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace Ryujinx.Graphics.Gpu.Memory
 {
@@ -14,12 +15,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
     /// </summary>
     class BufferManager
     {
+        private const int TfInfoVertexCountOffset = Constants.TotalTransformFeedbackBuffers * sizeof(int);
+        private const int TfInfoBufferSize = TfInfoVertexCountOffset + sizeof(int);
+
         private readonly GpuContext _context;
         private readonly GpuChannel _channel;
 
         private int _unalignedStorageBuffers;
         public bool HasUnalignedStorageBuffers => _unalignedStorageBuffers > 0;
 
+        public bool HasTransformFeedbackOutputs { get; set; }
+
         private IndexBuffer _indexBuffer;
         private readonly VertexBuffer[] _vertexBuffers;
         private readonly BufferBounds[] _transformFeedbackBuffers;
@@ -98,6 +104,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
         private readonly BuffersPerStage[] _gpStorageBuffers;
         private readonly BuffersPerStage[] _gpUniformBuffers;
 
+        private BufferHandle _tfInfoBuffer;
+        private int[] _tfInfoData;
+
         private bool _gpStorageBuffersDirty;
         private bool _gpUniformBuffersDirty;
 
@@ -137,6 +146,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
             _bufferTextures = new List<BufferTextureBinding>();
 
             _ranges = new BufferAssignment[Constants.TotalGpUniformBuffers * Constants.ShaderStages];
+
+            if (!context.Capabilities.SupportsTransformFeedback)
+            {
+                _tfInfoData = new int[Constants.TotalTransformFeedbackBuffers];
+            }
         }
 
 
@@ -319,6 +333,31 @@ namespace Ryujinx.Graphics.Gpu.Memory
             _gpUniformBuffersDirty = true;
         }
 
+        /// <summary>
+        /// Sets the number of vertices per instance on a instanced draw. Used for transform feedback emulation.
+        /// </summary>
+        /// <param name="vertexCount">Vertex count per instance</param>
+        public void SetInstancedDrawVertexCount(int vertexCount)
+        {
+            if (!_context.Capabilities.SupportsTransformFeedback &&
+                HasTransformFeedbackOutputs &&
+                _tfInfoBuffer != BufferHandle.Null)
+            {
+                Span<byte> data = stackalloc byte[sizeof(int)];
+                MemoryMarshal.Cast<byte, int>(data)[0] = vertexCount;
+                _context.Renderer.SetBufferData(_tfInfoBuffer, TfInfoVertexCountOffset, data);
+            }
+        }
+
+        /// <summary>
+        /// Forces transform feedback and storage buffers to be updated on the next draw.
+        /// </summary>
+        public void ForceTransformFeedbackAndStorageBuffersDirty()
+        {
+            _transformFeedbackBuffersDirty = true;
+            _gpStorageBuffersDirty = true;
+        }
+
         /// <summary>
         /// Sets the binding points for the storage buffers bound on the compute pipeline.
         /// </summary>
@@ -537,22 +576,75 @@ namespace Ryujinx.Graphics.Gpu.Memory
             {
                 _transformFeedbackBuffersDirty = false;
 
-                Span<BufferRange> tfbs = stackalloc BufferRange[Constants.TotalTransformFeedbackBuffers];
-
-                for (int index = 0; index < Constants.TotalTransformFeedbackBuffers; index++)
+                if (_context.Capabilities.SupportsTransformFeedback)
                 {
-                    BufferBounds tfb = _transformFeedbackBuffers[index];
+                    Span<BufferRange> tfbs = stackalloc BufferRange[Constants.TotalTransformFeedbackBuffers];
 
-                    if (tfb.Address == 0)
+                    for (int index = 0; index < Constants.TotalTransformFeedbackBuffers; index++)
                     {
-                        tfbs[index] = BufferRange.Empty;
-                        continue;
+                        BufferBounds tfb = _transformFeedbackBuffers[index];
+
+                        if (tfb.Address == 0)
+                        {
+                            tfbs[index] = BufferRange.Empty;
+                            continue;
+                        }
+
+                        tfbs[index] = bufferCache.GetBufferRange(tfb.Address, tfb.Size, write: true);
                     }
 
-                    tfbs[index] = bufferCache.GetBufferRange(tfb.Address, tfb.Size, write: true);
+                    _context.Renderer.Pipeline.SetTransformFeedbackBuffers(tfbs);
                 }
+                else if (HasTransformFeedbackOutputs)
+                {
+                    Span<int> info = _tfInfoData.AsSpan();
+                    Span<BufferAssignment> buffers = stackalloc BufferAssignment[Constants.TotalTransformFeedbackBuffers + 1];
+
+                    bool needsDataUpdate = false;
+
+                    if (_tfInfoBuffer == BufferHandle.Null)
+                    {
+                        _tfInfoBuffer = _context.Renderer.CreateBuffer(TfInfoBufferSize);
+                    }
+
+                    buffers[0] = new BufferAssignment(0, new BufferRange(_tfInfoBuffer, 0, TfInfoBufferSize));
+
+                    int alignment = _context.Capabilities.StorageBufferOffsetAlignment;
+
+                    for (int index = 0; index < Constants.TotalTransformFeedbackBuffers; index++)
+                    {
+                        BufferBounds tfb = _transformFeedbackBuffers[index];
+
+                        if (tfb.Address == 0)
+                        {
+                            buffers[1 + index] = new BufferAssignment(1 + index, BufferRange.Empty);
+                        }
+                        else
+                        {
+                            ulong endAddress = tfb.Address + tfb.Size;
+                            ulong address = BitUtils.AlignDown(tfb.Address, (ulong)alignment);
+                            ulong size = endAddress - address;
+
+                            int tfeOffset = ((int)tfb.Address & (alignment - 1)) / 4;
+
+                            if (info[index] != tfeOffset)
+                            {
+                                info[index] = tfeOffset;
+                                needsDataUpdate = true;
+                            }
+
+                            buffers[1 + index] = new BufferAssignment(1 + index, bufferCache.GetBufferRange(address, size, write: true));
+                        }
+                    }
 
-                _context.Renderer.Pipeline.SetTransformFeedbackBuffers(tfbs);
+                    if (needsDataUpdate)
+                    {
+                        Span<byte> infoData = MemoryMarshal.Cast<int, byte>(info);
+                        _context.Renderer.SetBufferData(_tfInfoBuffer, 0, infoData);
+                    }
+
+                    _context.Renderer.Pipeline.SetStorageBuffers(buffers);
+                }
             }
             else
             {

+ 1 - 1
src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs

@@ -37,7 +37,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             ShaderSpecializationState oldSpecState,
             ShaderSpecializationState newSpecState,
             ResourceCounts counts,
-            int stageIndex) : base(context, counts, stageIndex)
+            int stageIndex) : base(context, counts, stageIndex, oldSpecState.TransformFeedbackDescriptors != null)
         {
             _data = data;
             _cb1Data = cb1Data;

+ 6 - 2
src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs

@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
         private const ushort FileFormatVersionMajor = 1;
         private const ushort FileFormatVersionMinor = 2;
         private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
-        private const uint CodeGenVersion = 5044;
+        private const uint CodeGenVersion = 5080;
 
         private const string SharedTocFileName = "shared.toc";
         private const string SharedDataFileName = "shared.data";
@@ -368,7 +368,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
 
                         if (hostCode != null)
                         {
-                            ShaderInfo shaderInfo = ShaderInfoBuilder.BuildForCache(context, shaders, specState.PipelineState);
+                            ShaderInfo shaderInfo = ShaderInfoBuilder.BuildForCache(
+                                context,
+                                shaders,
+                                specState.PipelineState,
+                                specState.TransformFeedbackDescriptors != null);
 
                             IProgram hostProgram;
 

+ 1 - 1
src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs

@@ -491,7 +491,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
             {
                 ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length];
 
-                ShaderInfoBuilder shaderInfoBuilder = new ShaderInfoBuilder(_context);
+                ShaderInfoBuilder shaderInfoBuilder = new ShaderInfoBuilder(_context, compilation.SpecializationState.TransformFeedbackDescriptors != null);
 
                 for (int index = 0; index < compilation.TranslatedStages.Length; index++)
                 {

+ 2 - 2
src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs

@@ -30,7 +30,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
             GpuContext context,
             GpuChannel channel,
             GpuAccessorState state,
-            int stageIndex) : base(context, state.ResourceCounts, stageIndex)
+            int stageIndex) : base(context, state.ResourceCounts, stageIndex, state.TransformFeedbackDescriptors != null)
         {
             _isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
             _channel = channel;
@@ -44,7 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="context">GPU context</param>
         /// <param name="channel">GPU channel</param>
         /// <param name="state">Current GPU state</param>
-        public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0)
+        public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0, false)
         {
             _channel = channel;
             _state = state;

+ 24 - 6
src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs

@@ -17,40 +17,56 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private readonly ResourceCounts _resourceCounts;
         private readonly int _stageIndex;
 
+        private readonly int _reservedConstantBuffers;
+        private readonly int _reservedStorageBuffers;
+
         /// <summary>
         /// Creates a new GPU accessor.
         /// </summary>
         /// <param name="context">GPU context</param>
-        public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex)
+        /// <param name="resourceCounts">Counter of GPU resources used by the shader</param>
+        /// <param name="stageIndex">Index of the shader stage, 0 for compute</param>
+        /// <param name="tfEnabled">Indicates if the current graphics shader is used with transform feedback enabled</param>
+        public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex, bool tfEnabled)
         {
             _context = context;
             _resourceCounts = resourceCounts;
             _stageIndex = stageIndex;
+
+            _reservedConstantBuffers = 1; // For the support buffer.
+            _reservedStorageBuffers = !context.Capabilities.SupportsTransformFeedback && tfEnabled ? 5 : 0;
         }
 
         public int QueryBindingConstantBuffer(int index)
         {
+            int binding;
+
             if (_context.Capabilities.Api == TargetApi.Vulkan)
             {
-                // We need to start counting from 1 since binding 0 is reserved for the support uniform buffer.
-                return GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer") + 1;
+                binding = GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer");
             }
             else
             {
-                return _resourceCounts.UniformBuffersCount++;
+                binding = _resourceCounts.UniformBuffersCount++;
             }
+
+            return binding + _reservedConstantBuffers;
         }
 
         public int QueryBindingStorageBuffer(int index)
         {
+            int binding;
+
             if (_context.Capabilities.Api == TargetApi.Vulkan)
             {
-                return GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer");
+                binding = GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer");
             }
             else
             {
-                return _resourceCounts.StorageBuffersCount++;
+                binding = _resourceCounts.StorageBuffersCount++;
             }
+
+            return binding + _reservedStorageBuffers;
         }
 
         public int QueryBindingTexture(int index, bool isBuffer)
@@ -149,6 +165,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
         public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
 
+        public bool QueryHostSupportsTransformFeedback() => _context.Capabilities.SupportsTransformFeedback;
+
         public bool QueryHostSupportsViewportIndexVertexTessellation() => _context.Capabilities.SupportsViewportIndexVertexTessellation;
 
         public bool QueryHostSupportsViewportMask() => _context.Capabilities.SupportsViewportMask;

+ 0 - 8
src/Ryujinx.Graphics.Gpu/Shader/ResourceCounts.cs

@@ -24,13 +24,5 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Total of images used by the shaders.
         /// </summary>
         public int ImagesCount;
-
-        /// <summary>
-        /// Creates a new instance of the shader resource counts class.
-        /// </summary>
-        public ResourceCounts()
-        {
-            UniformBuffersCount = 1; // The first binding is reserved for the support buffer.
-        }
     }
 }

+ 1 - 1
src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs

@@ -362,7 +362,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             TranslatorContext previousStage = null;
 
-            ShaderInfoBuilder infoBuilder = new ShaderInfoBuilder(_context);
+            ShaderInfoBuilder infoBuilder = new ShaderInfoBuilder(_context, transformFeedbackDescriptors != null);
 
             for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
             {

+ 55 - 8
src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs

@@ -16,15 +16,24 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private const int TextureSetIndex = 2;
         private const int ImageSetIndex = 3;
 
-        private const ResourceStages SupportBufferStags =
+        private const ResourceStages SupportBufferStages =
             ResourceStages.Compute |
             ResourceStages.Vertex |
             ResourceStages.Fragment;
 
+        private const ResourceStages VtgStages =
+            ResourceStages.Vertex |
+            ResourceStages.TessellationControl |
+            ResourceStages.TessellationEvaluation |
+            ResourceStages.Geometry;
+
         private readonly GpuContext _context;
 
         private int _fragmentOutputMap;
 
+        private readonly int _reservedConstantBuffers;
+        private readonly int _reservedStorageBuffers;
+
         private readonly List<ResourceDescriptor>[] _resourceDescriptors;
         private readonly List<ResourceUsage>[] _resourceUsages;
 
@@ -32,7 +41,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Creates a new shader info builder.
         /// </summary>
         /// <param name="context">GPU context that owns the shaders that will be added to the builder</param>
-        public ShaderInfoBuilder(GpuContext context)
+        /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
+        public ShaderInfoBuilder(GpuContext context, bool tfEnabled)
         {
             _context = context;
 
@@ -47,7 +57,22 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 _resourceUsages[index] = new();
             }
 
-            AddDescriptor(SupportBufferStags, ResourceType.UniformBuffer, UniformSetIndex, 0, 1);
+            AddDescriptor(SupportBufferStages, ResourceType.UniformBuffer, UniformSetIndex, 0, 1);
+
+            _reservedConstantBuffers = 1; // For the support buffer.
+
+            if (!context.Capabilities.SupportsTransformFeedback && tfEnabled)
+            {
+                _reservedStorageBuffers = 5;
+
+                AddDescriptor(VtgStages, ResourceType.StorageBuffer, StorageSetIndex, 0, 5);
+                AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Read, StorageSetIndex, 0, 1);
+                AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Write, StorageSetIndex, 1, 4);
+            }
+            else
+            {
+                _reservedStorageBuffers = 0;
+            }
         }
 
         /// <summary>
@@ -86,8 +111,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
             int texturesPerStage = (int)_context.Capabilities.MaximumTexturesPerStage;
             int imagesPerStage = (int)_context.Capabilities.MaximumImagesPerStage;
 
-            int uniformBinding = 1 + stageIndex * uniformsPerStage;
-            int storageBinding = stageIndex * storagesPerStage;
+            int uniformBinding = _reservedConstantBuffers + stageIndex * uniformsPerStage;
+            int storageBinding = _reservedStorageBuffers + stageIndex * storagesPerStage;
             int textureBinding = stageIndex * texturesPerStage * 2;
             int imageBinding = stageIndex * imagesPerStage * 2;
 
@@ -133,6 +158,23 @@ namespace Ryujinx.Graphics.Gpu.Shader
             AddDescriptor(stages, type2, setIndex, binding + count, count);
         }
 
+        /// <summary>
+        /// Adds buffer usage information to the list of usages.
+        /// </summary>
+        /// <param name="stages">Shader stages where the resource is used</param>
+        /// <param name="type">Type of the resource</param>
+        /// <param name="access">How the resource is accessed by the shader stages where it is used</param>
+        /// <param name="setIndex">Descriptor set number where the resource will be bound</param>
+        /// <param name="binding">Binding number where the resource will be bound</param>
+        /// <param name="count">Number of resources bound at the binding location</param>
+        private void AddUsage(ResourceStages stages, ResourceType type, ResourceAccess access, int setIndex, int binding, int count)
+        {
+            for (int index = 0; index < count; index++)
+            {
+                _resourceUsages[setIndex].Add(new ResourceUsage(binding + index, type, stages, access));
+            }
+        }
+
         /// <summary>
         /// Adds buffer usage information to the list of usages.
         /// </summary>
@@ -212,10 +254,15 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="context">GPU context that owns the shaders</param>
         /// <param name="programs">Shaders from the disk cache</param>
         /// <param name="pipeline">Optional pipeline for background compilation</param>
+        /// <param name="tfEnabled">Indicates if the graphics shader is used with transform feedback enabled</param>
         /// <returns>Shader information</returns>
-        public static ShaderInfo BuildForCache(GpuContext context, IEnumerable<CachedShaderStage> programs, ProgramPipelineState? pipeline)
+        public static ShaderInfo BuildForCache(
+            GpuContext context,
+            IEnumerable<CachedShaderStage> programs,
+            ProgramPipelineState? pipeline,
+            bool tfEnabled)
         {
-            ShaderInfoBuilder builder = new ShaderInfoBuilder(context);
+            ShaderInfoBuilder builder = new ShaderInfoBuilder(context, tfEnabled);
 
             foreach (CachedShaderStage program in programs)
             {
@@ -237,7 +284,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <returns>Shader information</returns>
         public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, bool fromCache = false)
         {
-            ShaderInfoBuilder builder = new ShaderInfoBuilder(context);
+            ShaderInfoBuilder builder = new ShaderInfoBuilder(context, tfEnabled: false);
 
             builder.AddStageInfo(info);
 

+ 1 - 0
src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs

@@ -153,6 +153,7 @@ namespace Ryujinx.Graphics.OpenGL
                 supportsFragmentShaderOrderingIntel: HwCapabilities.SupportsFragmentShaderOrdering,
                 supportsGeometryShader: true,
                 supportsGeometryShaderPassthrough: HwCapabilities.SupportsGeometryShaderPassthrough,
+                supportsTransformFeedback: true,
                 supportsImageLoadFormatted: HwCapabilities.SupportsImageLoadFormatted,
                 supportsLayerVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
                 supportsMismatchingViewFormat: HwCapabilities.SupportsMismatchingViewFormat,

+ 6 - 0
src/Ryujinx.Graphics.Shader/Constants.cs

@@ -10,5 +10,11 @@ namespace Ryujinx.Graphics.Shader
         public const int NvnBaseVertexByteOffset = 0x640;
         public const int NvnBaseInstanceByteOffset = 0x644;
         public const int NvnDrawIndexByteOffset = 0x648;
+
+        // Transform Feedback emulation.
+
+        public const int TfeInfoBinding = 0;
+        public const int TfeBufferBaseBinding = 1;
+        public const int TfeBuffersCount = 4;
     }
 }

+ 9 - 0
src/Ryujinx.Graphics.Shader/IGpuAccessor.cs

@@ -367,6 +367,15 @@ namespace Ryujinx.Graphics.Shader
             return true;
         }
 
+        /// <summary>
+        /// Queries host GPU transform feedback support.
+        /// </summary>
+        /// <returns>True if the GPU and driver supports transform feedback, false otherwise</returns>
+        bool QueryHostSupportsTransformFeedback()
+        {
+            return true;
+        }
+
         /// <summary>
         /// Queries host support for writes to the viewport index from vertex or tessellation shader stages.
         /// </summary>

+ 39 - 0
src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs

@@ -234,6 +234,45 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public void PrepareForVertexReturn()
         {
+            if (!Config.GpuAccessor.QueryHostSupportsTransformFeedback() && Config.GpuAccessor.QueryTransformFeedbackEnabled())
+            {
+                Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1));
+
+                for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++)
+                {
+                    var locations = Config.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
+                    var stride = Config.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
+
+                    Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex));
+                    Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex);
+                    Operand baseInstance = this.Load(StorageKind.Input, IoVariable.BaseInstance);
+                    Operand vertexIndex = this.Load(StorageKind.Input, IoVariable.VertexIndex);
+                    Operand instanceIndex = this.Load(StorageKind.Input, IoVariable.InstanceIndex);
+
+                    Operand outputVertexOffset = this.ISubtract(vertexIndex, baseVertex);
+                    Operand outputInstanceOffset = this.ISubtract(instanceIndex, baseInstance);
+
+                    Operand outputBaseVertex = this.IMultiply(outputInstanceOffset, vertexCount);
+
+                    Operand vertexOffset = this.IMultiply(this.IAdd(outputBaseVertex, outputVertexOffset), Const(stride / 4));
+                    baseOffset = this.IAdd(baseOffset, vertexOffset);
+
+                    for (int j = 0; j < locations.Length; j++)
+                    {
+                        byte location = locations[j];
+                        if (location == 0xff)
+                        {
+                            continue;
+                        }
+
+                        Operand offset = this.IAdd(baseOffset, Const(j));
+                        Operand value = Instructions.AttributeMap.GenerateAttributeLoad(this, null, location * 4, isOutput: true, isPerPatch: false);
+
+                        this.Store(StorageKind.StorageBuffer, Constants.TfeBufferBaseBinding + tfbIndex, Const(0), offset, value);
+                    }
+                }
+            }
+
             if (Config.GpuAccessor.QueryViewportTransformDisable())
             {
                 Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));

+ 30 - 2
src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs

@@ -132,6 +132,11 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
 
+            TransformFeedbackEnabled =
+                stage != ShaderStage.Compute &&
+                gpuAccessor.QueryTransformFeedbackEnabled() &&
+                gpuAccessor.QueryHostSupportsTransformFeedback();
+
             UsedInputAttributesPerPatch  = new HashSet<int>();
             UsedOutputAttributesPerPatch = new HashSet<int>();
 
@@ -139,6 +144,31 @@ namespace Ryujinx.Graphics.Shader.Translation
             _usedImages   = new Dictionary<TextureInfo, TextureMeta>();
 
             ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties());
+
+            if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled())
+            {
+                StructureType tfeInfoStruct = new StructureType(new StructureField[]
+                {
+                    new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4),
+                    new StructureField(AggregateType.U32, "vertex_count")
+                });
+
+                BufferDefinition tfeInfoBuffer = new BufferDefinition(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
+
+                Properties.AddStorageBuffer(Constants.TfeInfoBinding, tfeInfoBuffer);
+
+                StructureType tfeDataStruct = new StructureType(new StructureField[]
+                {
+                    new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
+                });
+
+                for (int i = 0; i < Constants.TfeBuffersCount; i++)
+                {
+                    int binding = Constants.TfeBufferBaseBinding + i;
+                    BufferDefinition tfeDataBuffer = new BufferDefinition(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
+                    Properties.AddStorageBuffer(binding, tfeDataBuffer);
+                }
+            }
         }
 
         public ShaderConfig(
@@ -151,7 +181,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             ThreadsPerInputPrimitive = 1;
             OutputTopology           = outputTopology;
             MaxOutputVertices        = maxOutputVertices;
-            TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
         }
 
         public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options)
@@ -165,7 +194,6 @@ namespace Ryujinx.Graphics.Shader.Translation
             OmapTargets              = header.OmapTargets;
             OmapSampleMask           = header.OmapSampleMask;
             OmapDepth                = header.OmapDepth;
-            TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
             LastInVertexPipeline     = header.Stage < ShaderStage.Fragment;
         }
 

+ 2 - 2
src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs

@@ -16,9 +16,9 @@ namespace Ryujinx.Graphics.Vulkan
             _descriptorSets = descriptorSets;
         }
 
-        public void InitializeBuffers(int setIndex, int baseBinding, int countPerUnit, DescriptorType type, VkBuffer dummyBuffer)
+        public void InitializeBuffers(int setIndex, int baseBinding, int count, DescriptorType type, VkBuffer dummyBuffer)
         {
-            Span<DescriptorBufferInfo> infos = stackalloc DescriptorBufferInfo[countPerUnit];
+            Span<DescriptorBufferInfo> infos = stackalloc DescriptorBufferInfo[count];
 
             infos.Fill(new DescriptorBufferInfo()
             {

+ 8 - 25
src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs

@@ -596,36 +596,19 @@ namespace Ryujinx.Graphics.Vulkan
             }
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private void Initialize(CommandBufferScoped cbs, int setIndex, DescriptorSetCollection dsc)
         {
-            var dummyBuffer = _dummyBuffer?.GetBuffer().Get(cbs).Value ?? default;
+            // We don't support clearing texture descriptors currently.
+            if (setIndex != PipelineBase.UniformSetIndex && setIndex != PipelineBase.StorageSetIndex)
+            {
+                return;
+            }
 
-            uint stages = _program.Stages;
+            var dummyBuffer = _dummyBuffer?.GetBuffer().Get(cbs).Value ?? default;
 
-            while (stages != 0)
+            foreach (ResourceBindingSegment segment in _program.ClearSegments[setIndex])
             {
-                int stage = BitOperations.TrailingZeroCount(stages);
-                stages &= ~(1u << stage);
-
-                if (setIndex == PipelineBase.UniformSetIndex)
-                {
-                    dsc.InitializeBuffers(
-                        0,
-                        1 + stage * Constants.MaxUniformBuffersPerStage,
-                        Constants.MaxUniformBuffersPerStage,
-                        DescriptorType.UniformBuffer,
-                        dummyBuffer);
-                }
-                else if (setIndex == PipelineBase.StorageSetIndex)
-                {
-                    dsc.InitializeBuffers(
-                        0,
-                        stage * Constants.MaxStorageBuffersPerStage,
-                        Constants.MaxStorageBuffersPerStage,
-                        DescriptorType.StorageBuffer,
-                        dummyBuffer);
-                }
+                dsc.InitializeBuffers(0, segment.Binding, segment.Count, segment.Type.Convert(), dummyBuffer);
             }
         }
 

+ 56 - 0
src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs

@@ -24,6 +24,7 @@ namespace Ryujinx.Graphics.Vulkan
 
         public uint Stages { get; }
 
+        public ResourceBindingSegment[][] ClearSegments { get; }
         public ResourceBindingSegment[][] BindingSegments { get; }
 
         public ProgramLinkStatus LinkStatus { get; private set; }
@@ -115,6 +116,7 @@ namespace Ryujinx.Graphics.Vulkan
 
             Stages = stages;
 
+            ClearSegments = BuildClearSegments(resourceLayout.Sets);
             BindingSegments = BuildBindingSegments(resourceLayout.SetUsages);
 
             _compileTask = Task.CompletedTask;
@@ -135,6 +137,60 @@ namespace Ryujinx.Graphics.Vulkan
             _firstBackgroundUse = !fromCache;
         }
 
+        private static ResourceBindingSegment[][] BuildClearSegments(ReadOnlyCollection<ResourceDescriptorCollection> sets)
+        {
+            ResourceBindingSegment[][] segments = new ResourceBindingSegment[sets.Count][];
+
+            for (int setIndex = 0; setIndex < sets.Count; setIndex++)
+            {
+                List<ResourceBindingSegment> currentSegments = new List<ResourceBindingSegment>();
+
+                ResourceDescriptor currentDescriptor = default;
+                int currentCount = 0;
+
+                for (int index = 0; index < sets[setIndex].Descriptors.Count; index++)
+                {
+                    ResourceDescriptor descriptor = sets[setIndex].Descriptors[index];
+
+                    if (currentDescriptor.Binding + currentCount != descriptor.Binding ||
+                        currentDescriptor.Type != descriptor.Type ||
+                        currentDescriptor.Stages != descriptor.Stages)
+                    {
+                        if (currentCount != 0)
+                        {
+                            currentSegments.Add(new ResourceBindingSegment(
+                                currentDescriptor.Binding,
+                                currentCount,
+                                currentDescriptor.Type,
+                                currentDescriptor.Stages,
+                                ResourceAccess.ReadWrite));
+                        }
+
+                        currentDescriptor = descriptor;
+                        currentCount = descriptor.Count;
+                    }
+                    else
+                    {
+                        currentCount += descriptor.Count;
+                    }
+                }
+
+                if (currentCount != 0)
+                {
+                    currentSegments.Add(new ResourceBindingSegment(
+                        currentDescriptor.Binding,
+                        currentCount,
+                        currentDescriptor.Type,
+                        currentDescriptor.Stages,
+                        ResourceAccess.ReadWrite));
+                }
+
+                segments[setIndex] = currentSegments.ToArray();
+            }
+
+            return segments;
+        }
+
         private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection<ResourceUsageCollection> setUsages)
         {
             ResourceBindingSegment[][] segments = new ResourceBindingSegment[setUsages.Count][];

+ 1 - 0
src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs

@@ -589,6 +589,7 @@ namespace Ryujinx.Graphics.Vulkan
                 supportsFragmentShaderOrderingIntel: false,
                 supportsGeometryShader: Capabilities.SupportsGeometryShader,
                 supportsGeometryShaderPassthrough: Capabilities.SupportsGeometryShaderPassthrough,
+                supportsTransformFeedback: Capabilities.SupportsTransformFeedback,
                 supportsImageLoadFormatted: features2.Features.ShaderStorageImageReadWithoutFormat,
                 supportsLayerVertexTessellation: featuresVk12.ShaderOutputLayer,
                 supportsMismatchingViewFormat: true,