GlobalToStorage.cs 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  4. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  5. namespace Ryujinx.Graphics.Shader.Translation.Optimizations
  6. {
  7. static class GlobalToStorage
  8. {
  9. public static void RunPass(BasicBlock block, ShaderConfig config)
  10. {
  11. int sbStart = GetStorageBaseCbOffset(config.Stage);
  12. int sbEnd = sbStart + StorageDescsSize;
  13. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  14. {
  15. if (!(node.Value is Operation operation))
  16. {
  17. continue;
  18. }
  19. if (UsesGlobalMemory(operation.Inst))
  20. {
  21. Operand source = operation.GetSource(0);
  22. if (source.AsgOp is Operation asgOperation)
  23. {
  24. int storageIndex = SearchForStorageBase(asgOperation, sbStart, sbEnd);
  25. if (storageIndex >= 0)
  26. {
  27. // Storage buffers are implemented using global memory access.
  28. // If we know from where the base address of the access is loaded,
  29. // we can guess which storage buffer it is accessing.
  30. // We can then replace the global memory access with a storage
  31. // buffer access.
  32. node = ReplaceGlobalWithStorage(node, config, storageIndex);
  33. }
  34. else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
  35. {
  36. // Here we effectively try to replace a LDG instruction with LDC.
  37. // The hardware only supports a limited amount of constant buffers
  38. // so NVN "emulates" more constant buffers using global memory access.
  39. // Here we try to replace the global access back to a constant buffer
  40. // load.
  41. storageIndex = SearchForStorageBase(asgOperation, UbeBaseOffset, UbeBaseOffset + UbeDescsSize);
  42. if (storageIndex >= 0)
  43. {
  44. node = ReplaceLdgWithLdc(node, config, storageIndex);
  45. }
  46. }
  47. }
  48. }
  49. }
  50. }
  51. private static LinkedListNode<INode> ReplaceGlobalWithStorage(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
  52. {
  53. Operation operation = (Operation)node.Value;
  54. Operand GetStorageOffset()
  55. {
  56. Operand addrLow = operation.GetSource(0);
  57. Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex));
  58. Operand baseAddrTrunc = Local();
  59. Operand alignMask = Const(-config.GpuAccessor.QueryStorageBufferOffsetAlignment());
  60. Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
  61. node.List.AddBefore(node, andOp);
  62. Operand byteOffset = Local();
  63. Operand wordOffset = Local();
  64. Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
  65. Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
  66. node.List.AddBefore(node, subOp);
  67. node.List.AddBefore(node, shrOp);
  68. return wordOffset;
  69. }
  70. Operand[] sources = new Operand[operation.SourcesCount];
  71. sources[0] = Const(storageIndex);
  72. sources[1] = GetStorageOffset();
  73. for (int index = 2; index < operation.SourcesCount; index++)
  74. {
  75. sources[index] = operation.GetSource(index);
  76. }
  77. Operation storageOp;
  78. if (operation.Inst.IsAtomic())
  79. {
  80. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  81. storageOp = new Operation(inst, operation.Dest, sources);
  82. }
  83. else if (operation.Inst == Instruction.LoadGlobal)
  84. {
  85. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  86. }
  87. else
  88. {
  89. storageOp = new Operation(Instruction.StoreStorage, null, sources);
  90. }
  91. for (int index = 0; index < operation.SourcesCount; index++)
  92. {
  93. operation.SetSource(index, null);
  94. }
  95. LinkedListNode<INode> oldNode = node;
  96. node = node.List.AddBefore(node, storageOp);
  97. node.List.Remove(oldNode);
  98. return node;
  99. }
  100. private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
  101. {
  102. Operation operation = (Operation)node.Value;
  103. Operand GetCbufOffset()
  104. {
  105. Operand addrLow = operation.GetSource(0);
  106. Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
  107. Operand baseAddrTrunc = Local();
  108. Operand alignMask = Const(-config.GpuAccessor.QueryStorageBufferOffsetAlignment());
  109. Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
  110. node.List.AddBefore(node, andOp);
  111. Operand byteOffset = Local();
  112. Operand wordOffset = Local();
  113. Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
  114. Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
  115. node.List.AddBefore(node, subOp);
  116. node.List.AddBefore(node, shrOp);
  117. return wordOffset;
  118. }
  119. Operand[] sources = new Operand[operation.SourcesCount];
  120. sources[0] = Const(UbeFirstCbuf + storageIndex);
  121. sources[1] = GetCbufOffset();
  122. for (int index = 2; index < operation.SourcesCount; index++)
  123. {
  124. sources[index] = operation.GetSource(index);
  125. }
  126. Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources);
  127. for (int index = 0; index < operation.SourcesCount; index++)
  128. {
  129. operation.SetSource(index, null);
  130. }
  131. LinkedListNode<INode> oldNode = node;
  132. node = node.List.AddBefore(node, ldcOp);
  133. node.List.Remove(oldNode);
  134. return node;
  135. }
  136. private static int SearchForStorageBase(Operation operation, int sbStart, int sbEnd)
  137. {
  138. Queue<Operation> assignments = new Queue<Operation>();
  139. assignments.Enqueue(operation);
  140. while (assignments.TryDequeue(out operation))
  141. {
  142. for (int index = 0; index < operation.SourcesCount; index++)
  143. {
  144. Operand source = operation.GetSource(index);
  145. if (source.Type == OperandType.ConstantBuffer)
  146. {
  147. int slot = source.GetCbufSlot();
  148. int offset = source.GetCbufOffset();
  149. if (slot == 0 && offset >= sbStart && offset < sbEnd)
  150. {
  151. int storageIndex = (offset - sbStart) / StorageDescSize;
  152. return storageIndex;
  153. }
  154. }
  155. if (source.AsgOp is Operation asgOperation)
  156. {
  157. assignments.Enqueue(asgOperation);
  158. }
  159. }
  160. }
  161. return -1;
  162. }
  163. }
  164. }