Optimizer.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. namespace Ryujinx.Graphics.Shader.Translation.Optimizations
  6. {
  7. static class Optimizer
  8. {
  9. public static void RunPass(TransformContext context)
  10. {
  11. RunOptimizationPasses(context.Blocks, context.ResourceManager);
  12. // TODO: Some of those are not optimizations and shouldn't be here.
  13. GlobalToStorage.RunPass(context.Hfm, context.Blocks, context.ResourceManager, context.GpuAccessor, context.TargetLanguage);
  14. bool hostSupportsShaderFloat64 = context.GpuAccessor.QueryHostSupportsShaderFloat64();
  15. // Those passes are looking for specific patterns and only needs to run once.
  16. for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
  17. {
  18. BindlessToIndexed.RunPass(context.Blocks[blkIndex], context.ResourceManager);
  19. BindlessElimination.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor);
  20. // FragmentCoord only exists on fragment shaders, so we don't need to check other stages.
  21. if (context.Stage == ShaderStage.Fragment)
  22. {
  23. EliminateMultiplyByFragmentCoordW(context.Blocks[blkIndex]);
  24. }
  25. // If the host does not support double operations, we need to turn them into float operations.
  26. if (!hostSupportsShaderFloat64)
  27. {
  28. DoubleToFloat.RunPass(context.Hfm, context.Blocks[blkIndex]);
  29. }
  30. }
  31. // Run optimizations one last time to remove any code that is now optimizable after above passes.
  32. RunOptimizationPasses(context.Blocks, context.ResourceManager);
  33. }
  34. private static void RunOptimizationPasses(BasicBlock[] blocks, ResourceManager resourceManager)
  35. {
  36. bool modified;
  37. do
  38. {
  39. modified = false;
  40. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  41. {
  42. BasicBlock block = blocks[blkIndex];
  43. LinkedListNode<INode> node = block.Operations.First;
  44. while (node != null)
  45. {
  46. LinkedListNode<INode> nextNode = node.Next;
  47. bool isUnused = IsUnused(node.Value);
  48. if (node.Value is not Operation operation || isUnused)
  49. {
  50. if (node.Value is PhiNode phi && !isUnused)
  51. {
  52. isUnused = PropagatePhi(phi);
  53. }
  54. if (isUnused)
  55. {
  56. RemoveNode(block, node);
  57. modified = true;
  58. }
  59. node = nextNode;
  60. continue;
  61. }
  62. ConstantFolding.RunPass(resourceManager, operation);
  63. Simplification.RunPass(operation);
  64. if (DestIsLocalVar(operation))
  65. {
  66. if (operation.Inst == Instruction.Copy)
  67. {
  68. PropagateCopy(operation);
  69. RemoveNode(block, node);
  70. modified = true;
  71. }
  72. else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
  73. (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
  74. {
  75. if (DestHasNoUses(operation))
  76. {
  77. RemoveNode(block, node);
  78. }
  79. modified = true;
  80. }
  81. }
  82. node = nextNode;
  83. }
  84. if (BranchElimination.RunPass(block))
  85. {
  86. RemoveNode(block, block.Operations.Last);
  87. modified = true;
  88. }
  89. }
  90. }
  91. while (modified);
  92. }
  93. private static void PropagateCopy(Operation copyOp)
  94. {
  95. // Propagate copy source operand to all uses of
  96. // the destination operand.
  97. Operand dest = copyOp.Dest;
  98. Operand src = copyOp.GetSource(0);
  99. INode[] uses = dest.UseOps.ToArray();
  100. foreach (INode useNode in uses)
  101. {
  102. for (int index = 0; index < useNode.SourcesCount; index++)
  103. {
  104. if (useNode.GetSource(index) == dest)
  105. {
  106. useNode.SetSource(index, src);
  107. }
  108. }
  109. }
  110. }
  111. private static bool PropagatePhi(PhiNode phi)
  112. {
  113. // If all phi sources are the same, we can propagate it and remove the phi.
  114. Operand firstSrc = phi.GetSource(0);
  115. for (int index = 1; index < phi.SourcesCount; index++)
  116. {
  117. if (!IsSameOperand(firstSrc, phi.GetSource(index)))
  118. {
  119. return false;
  120. }
  121. }
  122. // All sources are equal, we can propagate the value.
  123. Operand dest = phi.Dest;
  124. INode[] uses = dest.UseOps.ToArray();
  125. foreach (INode useNode in uses)
  126. {
  127. for (int index = 0; index < useNode.SourcesCount; index++)
  128. {
  129. if (useNode.GetSource(index) == dest)
  130. {
  131. useNode.SetSource(index, firstSrc);
  132. }
  133. }
  134. }
  135. return true;
  136. }
  137. private static bool IsSameOperand(Operand x, Operand y)
  138. {
  139. if (x.Type != y.Type || x.Value != y.Value)
  140. {
  141. return false;
  142. }
  143. // TODO: Handle Load operations with the same storage and the same constant parameters.
  144. return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer;
  145. }
  146. private static bool PropagatePack(Operation packOp)
  147. {
  148. // Propagate pack source operands to uses by unpack
  149. // instruction. The source depends on the unpack instruction.
  150. bool modified = false;
  151. Operand dest = packOp.Dest;
  152. Operand src0 = packOp.GetSource(0);
  153. Operand src1 = packOp.GetSource(1);
  154. INode[] uses = dest.UseOps.ToArray();
  155. foreach (INode useNode in uses)
  156. {
  157. if (useNode is not Operation operation || operation.Inst != Instruction.UnpackHalf2x16)
  158. {
  159. continue;
  160. }
  161. if (operation.GetSource(0) == dest)
  162. {
  163. operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0);
  164. modified = true;
  165. }
  166. }
  167. return modified;
  168. }
  169. public static bool MatchDdxOrDdy(Operation operation)
  170. {
  171. // It's assumed that "operation.Inst" is ShuffleXor,
  172. // that should be checked before calling this method.
  173. Debug.Assert(operation.Inst == Instruction.ShuffleXor);
  174. bool modified = false;
  175. Operand src2 = operation.GetSource(1);
  176. Operand src3 = operation.GetSource(2);
  177. if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
  178. {
  179. return false;
  180. }
  181. if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
  182. {
  183. return false;
  184. }
  185. bool isDdy = src2.Value == 2;
  186. bool isDdx = !isDdy;
  187. // We can replace any use by a FSWZADD with DDX/DDY, when
  188. // the following conditions are true:
  189. // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
  190. // - The first source operand must be the shuffle output.
  191. // - The second source operand must be the shuffle first source operand.
  192. INode[] uses = operation.Dest.UseOps.ToArray();
  193. foreach (INode use in uses)
  194. {
  195. if (use is not Operation test)
  196. {
  197. continue;
  198. }
  199. if (use is not Operation useOp || useOp.Inst != Instruction.SwizzleAdd)
  200. {
  201. continue;
  202. }
  203. Operand fswzaddSrc1 = useOp.GetSource(0);
  204. Operand fswzaddSrc2 = useOp.GetSource(1);
  205. Operand fswzaddSrc3 = useOp.GetSource(2);
  206. if (fswzaddSrc1 != operation.Dest)
  207. {
  208. continue;
  209. }
  210. if (fswzaddSrc2 != operation.GetSource(0))
  211. {
  212. continue;
  213. }
  214. if (fswzaddSrc3.Type != OperandType.Constant)
  215. {
  216. continue;
  217. }
  218. int mask = fswzaddSrc3.Value;
  219. if ((isDdx && mask != 0b10011001) ||
  220. (isDdy && mask != 0b10100101))
  221. {
  222. continue;
  223. }
  224. useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
  225. modified = true;
  226. }
  227. return modified;
  228. }
  229. private static void EliminateMultiplyByFragmentCoordW(BasicBlock block)
  230. {
  231. foreach (INode node in block.Operations)
  232. {
  233. if (node is Operation operation)
  234. {
  235. EliminateMultiplyByFragmentCoordW(operation);
  236. }
  237. }
  238. }
  239. private static void EliminateMultiplyByFragmentCoordW(Operation operation)
  240. {
  241. // We're looking for the pattern:
  242. // y = x * gl_FragCoord.w
  243. // v = y * (1.0 / gl_FragCoord.w)
  244. // Then we transform it into:
  245. // v = x
  246. // This pattern is common on fragment shaders due to the way how perspective correction is done.
  247. // We are expecting a multiplication by the reciprocal of gl_FragCoord.w.
  248. if (operation.Inst != (Instruction.FP32 | Instruction.Multiply))
  249. {
  250. return;
  251. }
  252. Operand lhs = operation.GetSource(0);
  253. Operand rhs = operation.GetSource(1);
  254. // Check LHS of the the main multiplication operation. We expect an input being multiplied by gl_FragCoord.w.
  255. if (lhs.AsgOp is not Operation attrMulOp || attrMulOp.Inst != (Instruction.FP32 | Instruction.Multiply))
  256. {
  257. return;
  258. }
  259. Operand attrMulLhs = attrMulOp.GetSource(0);
  260. Operand attrMulRhs = attrMulOp.GetSource(1);
  261. // LHS should be any input, RHS should be exactly gl_FragCoord.w.
  262. if (!Utils.IsInputLoad(attrMulLhs.AsgOp) || !Utils.IsInputLoad(attrMulRhs.AsgOp, IoVariable.FragmentCoord, 3))
  263. {
  264. return;
  265. }
  266. // RHS of the main multiplication should be a reciprocal operation (1.0 / x).
  267. if (rhs.AsgOp is not Operation reciprocalOp || reciprocalOp.Inst != (Instruction.FP32 | Instruction.Divide))
  268. {
  269. return;
  270. }
  271. Operand reciprocalLhs = reciprocalOp.GetSource(0);
  272. Operand reciprocalRhs = reciprocalOp.GetSource(1);
  273. // Check if the divisor is a constant equal to 1.0.
  274. if (reciprocalLhs.Type != OperandType.Constant || reciprocalLhs.AsFloat() != 1.0f)
  275. {
  276. return;
  277. }
  278. // Check if the dividend is gl_FragCoord.w.
  279. if (!Utils.IsInputLoad(reciprocalRhs.AsgOp, IoVariable.FragmentCoord, 3))
  280. {
  281. return;
  282. }
  283. // If everything matches, we can replace the operation with the input load result.
  284. operation.TurnIntoCopy(attrMulLhs);
  285. }
  286. private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
  287. {
  288. // Remove a node from the nodes list, and also remove itself
  289. // from all the use lists on the operands that this node uses.
  290. block.Operations.Remove(llNode);
  291. Queue<INode> nodes = new();
  292. nodes.Enqueue(llNode.Value);
  293. while (nodes.TryDequeue(out INode node))
  294. {
  295. for (int index = 0; index < node.SourcesCount; index++)
  296. {
  297. Operand src = node.GetSource(index);
  298. if (src.Type != OperandType.LocalVariable)
  299. {
  300. continue;
  301. }
  302. if (src.UseOps.Remove(node) && src.UseOps.Count == 0)
  303. {
  304. Debug.Assert(src.AsgOp != null);
  305. nodes.Enqueue(src.AsgOp);
  306. }
  307. }
  308. }
  309. }
  310. private static bool IsUnused(INode node)
  311. {
  312. return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node);
  313. }
  314. private static bool HasSideEffects(INode node)
  315. {
  316. if (node is Operation operation)
  317. {
  318. switch (operation.Inst & Instruction.Mask)
  319. {
  320. case Instruction.AtomicAdd:
  321. case Instruction.AtomicAnd:
  322. case Instruction.AtomicCompareAndSwap:
  323. case Instruction.AtomicMaxS32:
  324. case Instruction.AtomicMaxU32:
  325. case Instruction.AtomicMinS32:
  326. case Instruction.AtomicMinU32:
  327. case Instruction.AtomicOr:
  328. case Instruction.AtomicSwap:
  329. case Instruction.AtomicXor:
  330. case Instruction.Call:
  331. case Instruction.ImageAtomic:
  332. return true;
  333. }
  334. }
  335. return false;
  336. }
  337. private static bool DestIsLocalVar(INode node)
  338. {
  339. if (node.DestsCount == 0)
  340. {
  341. return false;
  342. }
  343. for (int index = 0; index < node.DestsCount; index++)
  344. {
  345. Operand dest = node.GetDest(index);
  346. if (dest != null && dest.Type != OperandType.LocalVariable)
  347. {
  348. return false;
  349. }
  350. }
  351. return true;
  352. }
  353. private static bool DestHasNoUses(INode node)
  354. {
  355. for (int index = 0; index < node.DestsCount; index++)
  356. {
  357. Operand dest = node.GetDest(index);
  358. if (dest != null && dest.UseOps.Count != 0)
  359. {
  360. return false;
  361. }
  362. }
  363. return true;
  364. }
  365. }
  366. }