Эх сурвалжийг харах

Make sure attributes used on subsequent shader stages are initialized (#2538)

gdkchan 4 жил өмнө
parent
commit
ed754af8d5

+ 124 - 88
Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs

@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Version of the codegen (to be changed when codegen or guest format change).
         /// </summary>
-        private const ulong ShaderCodeGenVersion = 2494;
+        private const ulong ShaderCodeGenVersion = 2538;
 
         // Progress reporting helpers
         private volatile int _shaderCount;
@@ -290,81 +290,77 @@ namespace Ryujinx.Graphics.Gpu.Shader
                             {
                                 Task compileTask = Task.Run(() =>
                                 {
-                                    // Reconstruct code holder.
-                                    for (int i = 0; i < entries.Length; i++)
-                                    {
-                                        GuestShaderCacheEntry entry = entries[i];
-
-                                        if (entry == null)
-                                        {
-                                            continue;
-                                        }
+                                    TranslatorContext[] shaderContexts = null;
 
-                                        ShaderProgram program;
+                                    if (!isHostProgramValid)
+                                    {
+                                        shaderContexts = new TranslatorContext[1 + entries.Length];
 
-                                        if (entry.Header.SizeA != 0)
+                                        for (int i = 0; i < entries.Length; i++)
                                         {
-                                            ShaderProgramInfo shaderProgramInfo;
+                                            GuestShaderCacheEntry entry = entries[i];
 
-                                            if (isHostProgramValid)
+                                            if (entry == null)
                                             {
-                                                program = new ShaderProgram(entry.Header.Stage, "");
-                                                shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
+                                                continue;
                                             }
-                                            else
-                                            {
-                                                var binaryCode = new Memory<byte>(entry.Code);
 
-                                                var gpuAccessor = new CachedGpuAccessor(
-                                                    _context,
-                                                    binaryCode,
-                                                    binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
-                                                    entry.Header.GpuAccessorHeader,
-                                                    entry.TextureDescriptors);
+                                            var binaryCode = new Memory<byte>(entry.Code);
 
-                                                var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
-                                                var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA);
+                                            var gpuAccessor = new CachedGpuAccessor(
+                                                _context,
+                                                binaryCode,
+                                                binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
+                                                entry.Header.GpuAccessorHeader,
+                                                entry.TextureDescriptors);
+
+                                            var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
+
+                                            shaderContexts[i + 1] = Translator.CreateContext(0, gpuAccessor, options, counts);
 
-                                                TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, options, counts);
-                                                TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts);
+                                            if (entry.Header.SizeA != 0)
+                                            {
+                                                var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA);
 
-                                                program = translatorContext.Translate(out shaderProgramInfo, translatorContext2);
+                                                shaderContexts[0] = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts);
                                             }
+                                        }
+                                    }
 
-                                            // NOTE: Vertex B comes first in the shader cache.
-                                            byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
-                                            byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
+                                    // Reconstruct code holder.
+                                    for (int i = 0; i < entries.Length; i++)
+                                    {
+                                        GuestShaderCacheEntry entry = entries[i];
 
-                                            shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
+                                        if (entry == null)
+                                        {
+                                            continue;
+                                        }
+
+                                        ShaderProgram program;
+                                        ShaderProgramInfo shaderProgramInfo;
+
+                                        if (isHostProgramValid)
+                                        {
+                                            program = new ShaderProgram(entry.Header.Stage, "");
+                                            shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
                                         }
                                         else
                                         {
-                                            ShaderProgramInfo shaderProgramInfo;
+                                            int stageIndex = i + 1;
 
-                                            if (isHostProgramValid)
-                                            {
-                                                program = new ShaderProgram(entry.Header.Stage, "");
-                                                shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
-                                            }
-                                            else
-                                            {
-                                                var binaryCode = new Memory<byte>(entry.Code);
+                                            TranslatorContext currentStage = shaderContexts[stageIndex];
+                                            TranslatorContext nextStage = GetNextStageContext(shaderContexts, stageIndex);
+                                            TranslatorContext vertexA = stageIndex == 1 ? shaderContexts[0] : null;
 
-                                                var gpuAccessor = new CachedGpuAccessor(
-                                                    _context,
-                                                    binaryCode,
-                                                    binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
-                                                    entry.Header.GpuAccessorHeader,
-                                                    entry.TextureDescriptors);
+                                            program = currentStage.Translate(out shaderProgramInfo, nextStage, vertexA);
+                                        }
 
-                                                var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
-                                                program = Translator.CreateContext(0, gpuAccessor, options, counts).Translate(out shaderProgramInfo);
-                                            }
+                                        // NOTE: Vertex B comes first in the shader cache.
+                                        byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
+                                        byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray() : null;
 
-                                            byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
-
-                                            shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code);
-                                        }
+                                        shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
 
                                         shaderPrograms.Add(program);
                                     }
@@ -591,7 +587,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
 
                 // The shader isn't currently cached, translate it and compile it.
-                ShaderCodeHolder shader = TranslateShader(channel.MemoryManager, shaderContexts[0]);
+                ShaderCodeHolder shader = TranslateShader(_dumper, channel.MemoryManager, shaderContexts[0], null, null);
 
                 shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
 
@@ -715,11 +711,10 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 // The shader isn't currently cached, translate it and compile it.
                 ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages];
 
-                shaders[0] = TranslateShader(channel.MemoryManager, shaderContexts[1], shaderContexts[0]);
-                shaders[1] = TranslateShader(channel.MemoryManager, shaderContexts[2]);
-                shaders[2] = TranslateShader(channel.MemoryManager, shaderContexts[3]);
-                shaders[3] = TranslateShader(channel.MemoryManager, shaderContexts[4]);
-                shaders[4] = TranslateShader(channel.MemoryManager, shaderContexts[5]);
+                for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
+                {
+                    shaders[stageIndex] = TranslateShader(_dumper, channel.MemoryManager, shaderContexts, stageIndex + 1);
+                }
 
                 List<IShader> hostShaders = new List<IShader>();
 
@@ -942,53 +937,94 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Translates a previously generated translator context to something that the host API accepts.
         /// </summary>
+        /// <param name="dumper">Optional shader code dumper</param>
         /// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
-        /// <param name="translatorContext">Current translator context to translate</param>
-        /// <param name="translatorContext2">Optional translator context of the shader that should be combined</param>
+        /// <param name="stages">Translator context of all available shader stages</param>
+        /// <param name="stageIndex">Index on the stages array to translate</param>
         /// <returns>Compiled graphics shader code</returns>
-        private ShaderCodeHolder TranslateShader(
+        private static ShaderCodeHolder TranslateShader(
+            ShaderDumper dumper,
             MemoryManager memoryManager,
-            TranslatorContext translatorContext,
-            TranslatorContext translatorContext2 = null)
+            TranslatorContext[] stages,
+            int stageIndex)
         {
-            if (translatorContext == null)
+            TranslatorContext currentStage = stages[stageIndex];
+            TranslatorContext nextStage = GetNextStageContext(stages, stageIndex);
+            TranslatorContext vertexA = stageIndex == 1 ? stages[0] : null;
+
+            return TranslateShader(dumper, memoryManager, currentStage, nextStage, vertexA);
+        }
+
+        /// <summary>
+        /// Gets the next shader stage context, from an array of contexts and index of the current stage.
+        /// </summary>
+        /// <param name="stages">Translator context of all available shader stages</param>
+        /// <param name="stageIndex">Index on the stages array to translate</param>
+        /// <returns>The translator context of the next stage, or null if inexistent</returns>
+        private static TranslatorContext GetNextStageContext(TranslatorContext[] stages, int stageIndex)
+        {
+            for (int nextStageIndex = stageIndex + 1; nextStageIndex < stages.Length; nextStageIndex++)
             {
-                return null;
+                if (stages[nextStageIndex] != null)
+                {
+                    return stages[nextStageIndex];
+                }
             }
 
-            if (translatorContext2 != null)
+            return null;
+        }
+
+        /// <summary>
+        /// Translates a previously generated translator context to something that the host API accepts.
+        /// </summary>
+        /// <param name="dumper">Optional shader code dumper</param>
+        /// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
+        /// <param name="currentStage">Translator context of the stage to be translated</param>
+        /// <param name="nextStage">Translator context of the next active stage, if existent</param>
+        /// <param name="vertexA">Optional translator context of the shader that should be combined</param>
+        /// <returns>Compiled graphics shader code</returns>
+        private static ShaderCodeHolder TranslateShader(
+            ShaderDumper dumper,
+            MemoryManager memoryManager,
+            TranslatorContext currentStage,
+            TranslatorContext nextStage,
+            TranslatorContext vertexA)
+        {
+            if (currentStage == null)
             {
-                byte[] codeA = memoryManager.GetSpan(translatorContext2.Address, translatorContext2.Size).ToArray();
-                byte[] codeB = memoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray();
+                return null;
+            }
 
-                _dumper.Dump(codeA, compute: false, out string fullPathA, out string codePathA);
-                _dumper.Dump(codeB, compute: false, out string fullPathB, out string codePathB);
+            if (vertexA != null)
+            {
+                byte[] codeA = memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray();
+                byte[] codeB = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
 
-                ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo, translatorContext2);
+                ShaderDumpPaths pathsA = default;
+                ShaderDumpPaths pathsB = default;
 
-                if (fullPathA != null && fullPathB != null && codePathA != null && codePathB != null)
+                if (dumper != null)
                 {
-                    program.Prepend("// " + codePathB);
-                    program.Prepend("// " + fullPathB);
-                    program.Prepend("// " + codePathA);
-                    program.Prepend("// " + fullPathA);
+                    pathsA = dumper.Dump(codeA, compute: false);
+                    pathsB = dumper.Dump(codeB, compute: false);
                 }
 
+                ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage, vertexA);
+
+                pathsB.Prepend(program);
+                pathsA.Prepend(program);
+
                 return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA);
             }
             else
             {
-                byte[] code = memoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray();
+                byte[] code = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
 
-                _dumper.Dump(code, translatorContext.Stage == ShaderStage.Compute, out string fullPath, out string codePath);
+                ShaderDumpPaths paths = dumper?.Dump(code, currentStage.Stage == ShaderStage.Compute) ?? default;
 
-                ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo);
+                ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage);
 
-                if (fullPath != null && codePath != null)
-                {
-                    program.Prepend("// " + codePath);
-                    program.Prepend("// " + fullPath);
-                }
+                paths.Prepend(program);
 
                 return new ShaderCodeHolder(program, shaderProgramInfo, code);
             }

+ 49 - 0
Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs

@@ -0,0 +1,49 @@
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+    /// <summary>
+    /// Paths where shader code was dumped on disk.
+    /// </summary>
+    struct ShaderDumpPaths
+    {
+        /// <summary>
+        /// Path where the full shader code with header was dumped, or null if not dumped.
+        /// </summary>
+        public string FullPath { get; }
+
+        /// <summary>
+        /// Path where the shader code without header was dumped, or null if not dumped.
+        /// </summary>
+        public string CodePath { get; }
+
+        /// <summary>
+        /// True if the shader was dumped, false otherwise.
+        /// </summary>
+        public bool HasPath => FullPath != null && CodePath != null;
+
+        /// <summary>
+        /// Creates a new shader dumps path structure.
+        /// </summary>
+        /// <param name="fullPath">Path where the full shader code with header was dumped, or null if not dumped</param>
+        /// <param name="codePath">Path where the shader code without header was dumped, or null if not dumped</param>
+        public ShaderDumpPaths(string fullPath, string codePath)
+        {
+            FullPath = fullPath;
+            CodePath = codePath;
+        }
+
+        /// <summary>
+        /// Prepends the shader paths on the program source, as a comment.
+        /// </summary>
+        /// <param name="program">Program to prepend into</param>
+        public void Prepend(ShaderProgram program)
+        {
+            if (HasPath)
+            {
+                program.Prepend("// " + CodePath);
+                program.Prepend("// " + FullPath);
+            }
+        }
+    }
+}

+ 8 - 10
Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs

@@ -1,4 +1,4 @@
-using System;
+using Ryujinx.Graphics.Shader;
 using System.IO;
 
 namespace Ryujinx.Graphics.Gpu.Shader
@@ -30,24 +30,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// </summary>
         /// <param name="code">Code to be dumped</param>
         /// <param name="compute">True for compute shader code, false for graphics shader code</param>
-        /// <param name="fullPath">Output path for the shader code with header included</param>
-        /// <param name="codePath">Output path for the shader code without header</param>
-        public void Dump(byte[] code, bool compute, out string fullPath, out string codePath)
+        /// <returns>Paths where the shader code was dumped</returns>
+        public ShaderDumpPaths Dump(byte[] code, bool compute)
         {
             _dumpPath = GraphicsConfig.ShadersDumpPath;
 
             if (string.IsNullOrWhiteSpace(_dumpPath))
             {
-                fullPath = null;
-                codePath = null;
-
-                return;
+                return default;
             }
 
             string fileName = "Shader" + CurrentDumpIndex.ToString("d4") + ".bin";
 
-            fullPath = Path.Combine(FullDir(), fileName);
-            codePath = Path.Combine(CodeDir(), fileName);
+            string fullPath = Path.Combine(FullDir(), fileName);
+            string codePath = Path.Combine(CodeDir(), fileName);
 
             CurrentDumpIndex++;
 
@@ -73,6 +69,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
             {
                 codeWriter.Write(0);
             }
+
+            return new ShaderDumpPaths(fullPath, codePath);
         }
 
         /// <summary>

+ 15 - 48
Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs

@@ -3,14 +3,12 @@ using Ryujinx.Graphics.Shader.StructuredIr;
 using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.Linq;
+using System.Numerics;
 
 namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 {
     static class Declarations
     {
-        // At least 16 attributes are guaranteed by the spec.
-        public const int MaxAttributes = 16;
-
         public static void Declare(CodeGenContext context, StructuredProgramInfo info)
         {
             context.AppendLine("#version 450 core");
@@ -129,14 +127,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                     context.AppendLine();
                 }
 
-                if (info.IAttributes.Count != 0 || context.Config.GpPassthrough)
+                if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough)
                 {
                     DeclareInputAttributes(context, info);
 
                     context.AppendLine();
                 }
 
-                if (info.OAttributes.Count != 0 || context.Config.Stage != ShaderStage.Fragment)
+                if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment)
                 {
                     DeclareOutputAttributes(context, info);
 
@@ -404,24 +402,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
         private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info)
         {
-            if (context.Config.GpPassthrough)
+            int usedAttribtes = context.Config.UsedInputAttributes;
+            while (usedAttribtes != 0)
             {
-                for (int attr = 0; attr < MaxAttributes; attr++)
-                {
-                    DeclareInputAttribute(context, info, attr);
-                }
+                int index = BitOperations.TrailingZeroCount(usedAttribtes);
 
-                foreach (int attr in info.IAttributes.OrderBy(x => x).Where(x => x >= MaxAttributes))
-                {
-                    DeclareInputAttribute(context, info, attr);
-                }
-            }
-            else
-            {
-                foreach (int attr in info.IAttributes.OrderBy(x => x))
-                {
-                    DeclareInputAttribute(context, info, attr);
-                }
+                DeclareInputAttribute(context, info, index);
+
+                usedAttribtes &= ~(1 << index);
             }
         }
 
@@ -440,8 +428,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
                 };
             }
 
-            string pass = context.Config.GpPassthrough && !info.OAttributes.Contains(attr) ? "passthrough, " : string.Empty;
-
+            string pass = (context.Config.PassthroughAttributes & (1 << attr)) != 0 ? "passthrough, " : string.Empty;
             string name = $"{DefaultNames.IAttributePrefix}{attr}";
 
             if ((context.Config.Options.Flags & TranslationFlags.Feedback) != 0)
@@ -461,34 +448,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
         private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info)
         {
-            if (context.Config.Stage == ShaderStage.Fragment || context.Config.GpPassthrough)
+            int usedAttribtes = context.Config.UsedOutputAttributes;
+            while (usedAttribtes != 0)
             {
-                DeclareUsedOutputAttributes(context, info);
-            }
-            else
-            {
-                DeclareAllOutputAttributes(context, info);
-            }
-        }
+                int index = BitOperations.TrailingZeroCount(usedAttribtes);
 
-        private static void DeclareUsedOutputAttributes(CodeGenContext context, StructuredProgramInfo info)
-        {
-            foreach (int attr in info.OAttributes.OrderBy(x => x))
-            {
-                DeclareOutputAttribute(context, attr);
-            }
-        }
-
-        private static void DeclareAllOutputAttributes(CodeGenContext context, StructuredProgramInfo info)
-        {
-            for (int attr = 0; attr < MaxAttributes; attr++)
-            {
-                DeclareOutputAttribute(context, attr);
-            }
+                DeclareOutputAttribute(context, index);
 
-            foreach (int attr in info.OAttributes.OrderBy(x => x).Where(x => x >= MaxAttributes))
-            {
-                DeclareOutputAttribute(context, attr);
+                usedAttribtes &= ~(1 << index);
             }
         }
 

+ 0 - 40
Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs

@@ -49,46 +49,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
 
             Declarations.DeclareLocals(context, function);
 
-            if (funcName == MainFunctionName)
-            {
-                // Some games will leave some elements of gl_Position uninitialized,
-                // in those cases, the elements will contain undefined values according
-                // to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1),
-                // so we do explicit initialization to avoid UB on non-NVIDIA gpus.
-                if (context.Config.Stage == ShaderStage.Vertex)
-                {
-                    context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);");
-                }
-
-                // Ensure that unused attributes are set, otherwise the downstream
-                // compiler may eliminate them.
-                // (Not needed for fragment shader as it is the last stage).
-                if (context.Config.Stage != ShaderStage.Compute &&
-                    context.Config.Stage != ShaderStage.Fragment &&
-                    !context.Config.GpPassthrough)
-                {
-                    for (int attr = 0; attr < Declarations.MaxAttributes; attr++)
-                    {
-                        if (info.OAttributes.Contains(attr))
-                        {
-                            continue;
-                        }
-
-                        if ((context.Config.Options.Flags & TranslationFlags.Feedback) != 0)
-                        {
-                            context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_x = 0.0;");
-                            context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_y = 0.0;");
-                            context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_z = 0.0;");
-                            context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_w = 1.0;");
-                        }
-                        else
-                        {
-                            context.AppendLine($"{DefaultNames.OAttributePrefix}{attr} = vec4(0.0, 0.0, 0.0, 1.0);");
-                        }
-                    }
-                }
-            }
-
             PrintBlock(context, function.MainBlock);
 
             context.LeaveScope();

+ 34 - 15
Ryujinx.Graphics.Shader/Decoders/Decoder.cs

@@ -1,4 +1,5 @@
 using Ryujinx.Graphics.Shader.Instructions;
+using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.Collections.Generic;
 using System.Linq;
@@ -9,10 +10,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
 {
     static class Decoder
     {
-        public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
+        public static Block[][] Decode(ShaderConfig config, ulong startAddress)
         {
-            hasBindless = false;
-
             List<Block[]> funcs = new List<Block[]>();
 
             Queue<ulong> funcQueue = new Queue<ulong>();
@@ -90,8 +89,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
                             }
                         }
 
-                        FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
-                        hasBindless |= blockHasBindless;
+                        FillBlock(config, currBlock, limitAddress, startAddress);
 
                         if (currBlock.OpCodes.Count != 0)
                         {
@@ -168,7 +166,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
                             for (int i = 0; i < cbOffsetsCount; i++)
                             {
-                                uint targetOffset = gpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
+                                uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
                                 Block target = GetBlock(baseOffset + targetOffset);
                                 opBrIndir.PossibleTargets.Add(target);
                                 target.Predecessors.Add(block);
@@ -224,15 +222,11 @@ namespace Ryujinx.Graphics.Shader.Decoders
             return false;
         }
 
-        private static void FillBlock(
-            IGpuAccessor gpuAccessor,
-            Block        block,
-            ulong        limitAddress,
-            ulong        startAddress,
-            out bool     hasBindless)
+        private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
         {
+            IGpuAccessor gpuAccessor = config.GpuAccessor;
+
             ulong address = block.Address;
-            hasBindless = false;
 
             do
             {
@@ -274,13 +268,38 @@ namespace Ryujinx.Graphics.Shader.Decoders
                 OpCode op = makeOp(emitter, opAddress, opCode);
 
                 // We check these patterns to figure out the presence of bindless access
-                hasBindless |= (op is OpCodeImage image && image.IsBindless) ||
+                if ((op is OpCodeImage image && image.IsBindless) ||
                     (op is OpCodeTxd txd && txd.IsBindless) ||
                     (op is OpCodeTld4B) ||
                     (emitter == InstEmit.TexB) ||
                     (emitter == InstEmit.TldB) ||
                     (emitter == InstEmit.TmmlB) ||
-                    (emitter == InstEmit.TxqB);
+                    (emitter == InstEmit.TxqB))
+                {
+                    config.SetUsedFeature(FeatureFlags.Bindless);
+                }
+
+                // Populate used attributes.
+                if (op is IOpCodeAttribute opAttr)
+                {
+                    for (int elemIndex = 0; elemIndex < opAttr.Count; elemIndex++)
+                    {
+                        int attr = opAttr.AttributeOffset + elemIndex * 4;
+                        if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
+                        {
+                            int index = (attr - AttributeConsts.UserAttributeBase) / 16;
+
+                            if (op.Emitter == InstEmit.Ast)
+                            {
+                                config.SetOutputUserAttribute(index);
+                            }
+                            else
+                            {
+                                config.SetInputUserAttribute(index);
+                            }
+                        }
+                    }
+                }
 
                 block.OpCodes.Add(op);
             }

+ 8 - 0
Ryujinx.Graphics.Shader/Decoders/IOpCodeAttribute.cs

@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+    interface IOpCodeAttribute
+    {
+        int AttributeOffset { get; }
+        int Count { get; }
+    }
+}

+ 1 - 1
Ryujinx.Graphics.Shader/Decoders/OpCodeAttribute.cs

@@ -2,7 +2,7 @@ using Ryujinx.Graphics.Shader.Instructions;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeAttribute : OpCodeAluReg
+    class OpCodeAttribute : OpCodeAluReg, IOpCodeAttribute
     {
         public int AttributeOffset { get; }
         public int Count           { get; }

+ 2 - 1
Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs

@@ -2,9 +2,10 @@ using Ryujinx.Graphics.Shader.Instructions;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeIpa : OpCodeAluReg
+    class OpCodeIpa : OpCodeAluReg, IOpCodeAttribute
     {
         public int AttributeOffset { get; }
+        public int Count => 1;
 
         public InterpolationMode Mode { get; }
 

+ 0 - 35
Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs

@@ -277,21 +277,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
 
         public AstOperand GetOperandDef(Operand operand)
         {
-            if (TryGetUserAttributeIndex(operand, out int attrIndex))
-            {
-                Info.OAttributes.Add(attrIndex);
-            }
-
             return GetOperand(operand);
         }
 
         public AstOperand GetOperandUse(Operand operand)
         {
-            if (TryGetUserAttributeIndex(operand, out int attrIndex))
-            {
-                Info.IAttributes.Add(attrIndex);
-            }
-
             return GetOperand(operand);
         }
 
@@ -318,30 +308,5 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
 
             return astOperand;
         }
-
-        private static bool TryGetUserAttributeIndex(Operand operand, out int attrIndex)
-        {
-            if (operand.Type == OperandType.Attribute)
-            {
-                if (operand.Value >= AttributeConsts.UserAttributeBase &&
-                    operand.Value <  AttributeConsts.UserAttributeEnd)
-                {
-                    attrIndex = (operand.Value - AttributeConsts.UserAttributeBase) >> 4;
-
-                    return true;
-                }
-                else if (operand.Value >= AttributeConsts.FragmentOutputColorBase &&
-                         operand.Value <  AttributeConsts.FragmentOutputColorEnd)
-                {
-                    attrIndex = (operand.Value - AttributeConsts.FragmentOutputColorBase) >> 4;
-
-                    return true;
-                }
-            }
-
-            attrIndex = 0;
-
-            return false;
-        }
     }
 }

+ 0 - 6
Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs

@@ -6,17 +6,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
     {
         public List<StructuredFunction> Functions { get; }
 
-        public HashSet<int> IAttributes { get; }
-        public HashSet<int> OAttributes { get; }
-
         public HelperFunctionsMask HelperFunctionsMask { get; set; }
 
         public StructuredProgramInfo()
         {
             Functions = new List<StructuredFunction>();
-
-            IAttributes = new HashSet<int>();
-            OAttributes = new HashSet<int>();
         }
     }
 }

+ 3 - 0
Ryujinx.Graphics.Shader/Translation/EmitterContext.cs

@@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public bool IsNonMain { get; }
 
+        public int OperationsCount => _operations.Count;
+
         private readonly IReadOnlyDictionary<ulong, int> _funcs;
         private readonly List<Operation> _operations;
         private readonly Dictionary<ulong, Operand> _labels;
@@ -200,6 +202,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                     if (target.Enabled)
                     {
+                        Config.SetOutputUserAttribute(rtIndex);
                         regIndexBase += 4;
                     }
                 }

+ 28 - 0
Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs

@@ -41,6 +41,10 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         private readonly TranslationCounts _counts;
 
+        public int UsedInputAttributes { get; private set; }
+        public int UsedOutputAttributes { get; private set; }
+        public int PassthroughAttributes { get; private set; }
+
         private int _usedConstantBuffers;
         private int _usedStorageBuffers;
         private int _usedStorageBuffersWrite;
@@ -170,6 +174,8 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             TextureHandlesForCache.UnionWith(other.TextureHandlesForCache);
 
+            UsedInputAttributes |= other.UsedInputAttributes;
+            UsedOutputAttributes |= other.UsedOutputAttributes;
             _usedConstantBuffers |= other._usedConstantBuffers;
             _usedStorageBuffers |= other._usedStorageBuffers;
             _usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
@@ -191,6 +197,28 @@ namespace Ryujinx.Graphics.Shader.Translation
             }
         }
 
+        public void SetInputUserAttribute(int index)
+        {
+            UsedInputAttributes |= 1 << index;
+        }
+
+        public void SetOutputUserAttribute(int index)
+        {
+            UsedOutputAttributes |= 1 << index;
+        }
+
+        public void MergeOutputUserAttributes(int mask)
+        {
+            if (GpPassthrough)
+            {
+                PassthroughAttributes = mask & ~UsedOutputAttributes;
+            }
+            else
+            {
+                UsedOutputAttributes |= mask;
+            }
+        }
+
         public void SetClipDistanceWritten(int index)
         {
             ClipDistancesWritten |= (byte)(1 << index);

+ 46 - 11
Ryujinx.Graphics.Shader/Translation/Translator.cs

@@ -5,6 +5,7 @@ using Ryujinx.Graphics.Shader.StructuredIr;
 using Ryujinx.Graphics.Shader.Translation.Optimizations;
 using System;
 using System.Collections.Generic;
+using System.Numerics;
 
 using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
 
@@ -120,24 +121,17 @@ namespace Ryujinx.Graphics.Shader.Translation
             Block[][] cfg;
             ulong maxEndAddress = 0;
 
-            bool hasBindless;
-
             if ((options.Flags & TranslationFlags.Compute) != 0)
             {
                 config = new ShaderConfig(gpuAccessor, options, counts);
 
-                cfg = Decoder.Decode(gpuAccessor, address, out hasBindless);
+                cfg = Decoder.Decode(config, address);
             }
             else
             {
                 config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts);
 
-                cfg = Decoder.Decode(gpuAccessor, address + HeaderSize, out hasBindless);
-            }
-
-            if (hasBindless)
-            {
-                config.SetUsedFeature(FeatureFlags.Bindless);
+                cfg = Decoder.Decode(config, address + HeaderSize);
             }
 
             for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
@@ -151,7 +145,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                         maxEndAddress = block.EndAddress;
                     }
 
-                    if (!hasBindless)
+                    if (!config.UsedFeatures.HasFlag(FeatureFlags.Bindless))
                     {
                         for (int index = 0; index < block.OpCodes.Count; index++)
                         {
@@ -169,8 +163,10 @@ namespace Ryujinx.Graphics.Shader.Translation
             return cfg;
         }
 
-        internal static FunctionCode[] EmitShader(Block[][] cfg, ShaderConfig config)
+        internal static FunctionCode[] EmitShader(Block[][] cfg, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
         {
+            initializationOperations = 0;
+
             Dictionary<ulong, int> funcIds = new Dictionary<ulong, int>();
 
             for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
@@ -184,6 +180,12 @@ namespace Ryujinx.Graphics.Shader.Translation
             {
                 EmitterContext context = new EmitterContext(config, funcIndex != 0, funcIds);
 
+                if (initializeOutputs && funcIndex == 0)
+                {
+                    EmitOutputsInitialization(context, config);
+                    initializationOperations = context.OperationsCount;
+                }
+
                 for (int blkIndex = 0; blkIndex < cfg[funcIndex].Length; blkIndex++)
                 {
                     Block block = cfg[funcIndex][blkIndex];
@@ -201,6 +203,39 @@ namespace Ryujinx.Graphics.Shader.Translation
             return funcs.ToArray();
         }
 
+        private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
+        {
+            // Compute has no output attributes, and fragment is the last stage, so we
+            // don't need to initialize outputs on those stages.
+            if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
+            {
+                return;
+            }
+
+            void InitializeOutput(int baseAttr)
+            {
+                for (int c = 0; c < 4; c++)
+                {
+                    context.Copy(Attribute(baseAttr + c * 4), ConstF(c == 3 ? 1f : 0f));
+                }
+            }
+
+            if (config.Stage == ShaderStage.Vertex)
+            {
+                InitializeOutput(AttributeConsts.PositionX);
+            }
+
+            int usedAttribtes = context.Config.UsedOutputAttributes;
+            while (usedAttribtes != 0)
+            {
+                int index = BitOperations.TrailingZeroCount(usedAttribtes);
+
+                InitializeOutput(AttributeConsts.UserAttributeBase + index * 16);
+
+                usedAttribtes &= ~(1 << index);
+            }
+        }
+
         private static void EmitOps(EmitterContext context, Block block)
         {
             for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)

+ 28 - 6
Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs

@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation
                    operand.Value < AttributeConsts.UserAttributeEnd;
         }
 
-        private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b)
+        private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart)
         {
             // Here we combine two shaders.
             // For shader A:
@@ -57,7 +57,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             Operand lblB = Label();
 
-            for (int index = 0; index < a[0].Code.Length; index++)
+            for (int index = aStart; index < a[0].Code.Length; index++)
             {
                 Operation operation = a[0].Code[index];
 
@@ -103,7 +103,17 @@ namespace Ryujinx.Graphics.Shader.Translation
 
                         if (temp != null)
                         {
-                            operation.SetSource(srcIndex, temp);
+                            // TODO: LoadAttribute should accept any integer value as first argument,
+                            // then we don't need special case here. Right now it expects the first
+                            // operand to be of type "attribute".
+                            if ((operation.Inst & Instruction.Mask) == Instruction.LoadAttribute)
+                            {
+                                operation.TurnIntoCopy(temp);
+                            }
+                            else
+                            {
+                                operation.SetSource(srcIndex, temp);
+                            }
                         }
                     }
                 }
@@ -126,13 +136,25 @@ namespace Ryujinx.Graphics.Shader.Translation
             return output;
         }
 
-        public ShaderProgram Translate(out ShaderProgramInfo shaderProgramInfo, TranslatorContext other = null)
+        public ShaderProgram Translate(
+            out ShaderProgramInfo shaderProgramInfo,
+            TranslatorContext nextStage = null,
+            TranslatorContext other = null)
         {
-            FunctionCode[] code = EmitShader(_cfg, _config);
+            if (nextStage != null)
+            {
+                _config.MergeOutputUserAttributes(nextStage._config.UsedInputAttributes);
+            }
+
+            FunctionCode[] code = EmitShader(_cfg, _config, initializeOutputs: other == null, out _);
 
             if (other != null)
             {
-                code = Combine(EmitShader(other._cfg, other._config), code);
+                other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes);
+
+                FunctionCode[] otherCode = EmitShader(other._cfg, other._config, initializeOutputs: true, out int aStart);
+
+                code = Combine(otherCode, code, aStart);
 
                 _config.InheritFrom(other._config);
             }