Decoder.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. using Ryujinx.Graphics.Shader.Instructions;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Decoders
  7. {
  8. static class Decoder
  9. {
  10. public const ulong ShaderEndDelimiter = 0xe2400fffff87000f;
  11. public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
  12. {
  13. hasBindless = false;
  14. List<Block[]> funcs = new List<Block[]>();
  15. Queue<ulong> funcQueue = new Queue<ulong>();
  16. HashSet<ulong> funcVisited = new HashSet<ulong>();
  17. void EnqueueFunction(ulong funcAddress)
  18. {
  19. if (funcVisited.Add(funcAddress))
  20. {
  21. funcQueue.Enqueue(funcAddress);
  22. }
  23. }
  24. funcQueue.Enqueue(0);
  25. while (funcQueue.TryDequeue(out ulong funcAddress))
  26. {
  27. List<Block> blocks = new List<Block>();
  28. Queue<Block> workQueue = new Queue<Block>();
  29. Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
  30. Block GetBlock(ulong blkAddress)
  31. {
  32. if (!visited.TryGetValue(blkAddress, out Block block))
  33. {
  34. block = new Block(blkAddress);
  35. workQueue.Enqueue(block);
  36. visited.Add(blkAddress, block);
  37. }
  38. return block;
  39. }
  40. GetBlock(funcAddress);
  41. while (workQueue.TryDequeue(out Block currBlock))
  42. {
  43. // Check if the current block is inside another block.
  44. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  45. {
  46. Block nBlock = blocks[nBlkIndex];
  47. if (nBlock.Address == currBlock.Address)
  48. {
  49. throw new InvalidOperationException("Found duplicate block address on the list.");
  50. }
  51. nBlock.Split(currBlock);
  52. blocks.Insert(nBlkIndex + 1, currBlock);
  53. continue;
  54. }
  55. // If we have a block after the current one, set the limit address.
  56. ulong limitAddress = ulong.MaxValue;
  57. if (nBlkIndex != blocks.Count)
  58. {
  59. Block nBlock = blocks[nBlkIndex];
  60. int nextIndex = nBlkIndex + 1;
  61. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  62. {
  63. limitAddress = blocks[nextIndex].Address;
  64. }
  65. else if (nBlock.Address > currBlock.Address)
  66. {
  67. limitAddress = blocks[nBlkIndex].Address;
  68. }
  69. }
  70. FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
  71. hasBindless |= blockHasBindless;
  72. if (currBlock.OpCodes.Count != 0)
  73. {
  74. // We should have blocks for all possible branch targets,
  75. // including those from SSY/PBK instructions.
  76. foreach (OpCodePush pushOp in currBlock.PushOpCodes)
  77. {
  78. GetBlock(pushOp.GetAbsoluteAddress());
  79. }
  80. // Set child blocks. "Branch" is the block the branch instruction
  81. // points to (when taken), "Next" is the block at the next address,
  82. // executed when the branch is not taken. For Unconditional Branches
  83. // or end of program, Next is null.
  84. OpCode lastOp = currBlock.GetLastOp();
  85. if (lastOp is OpCodeBranch opBr)
  86. {
  87. if (lastOp.Emitter == InstEmit.Cal)
  88. {
  89. EnqueueFunction(opBr.GetAbsoluteAddress());
  90. }
  91. else
  92. {
  93. currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
  94. }
  95. }
  96. else if (lastOp is OpCodeBranchIndir opBrIndir)
  97. {
  98. // An indirect branch could go anywhere, we don't know the target.
  99. // Those instructions are usually used on a switch to jump table
  100. // compiler optimization, and in those cases the possible targets
  101. // seems to be always right after the BRX itself. We can assume
  102. // that the possible targets are all the blocks in-between the
  103. // instruction right after the BRX, and the common target that
  104. // all the "cases" should eventually jump to, acting as the
  105. // switch break.
  106. Block firstTarget = GetBlock(currBlock.EndAddress);
  107. firstTarget.BrIndir = opBrIndir;
  108. opBrIndir.PossibleTargets.Add(firstTarget);
  109. }
  110. if (!IsUnconditionalBranch(lastOp))
  111. {
  112. currBlock.Next = GetBlock(currBlock.EndAddress);
  113. }
  114. }
  115. // Insert the new block on the list (sorted by address).
  116. if (blocks.Count != 0)
  117. {
  118. Block nBlock = blocks[nBlkIndex];
  119. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  120. }
  121. else
  122. {
  123. blocks.Add(currBlock);
  124. }
  125. // Do we have a block after the current one?
  126. if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress))
  127. {
  128. bool targetVisited = visited.ContainsKey(currBlock.EndAddress);
  129. Block possibleTarget = GetBlock(currBlock.EndAddress);
  130. currBlock.BrIndir.PossibleTargets.Add(possibleTarget);
  131. if (!targetVisited)
  132. {
  133. possibleTarget.BrIndir = currBlock.BrIndir;
  134. }
  135. }
  136. }
  137. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  138. {
  139. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  140. {
  141. PropagatePushOp(visited, block, pushOpIndex);
  142. }
  143. }
  144. funcs.Add(blocks.ToArray());
  145. }
  146. return funcs.ToArray();
  147. }
  148. private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress)
  149. {
  150. if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) ||
  151. !gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7))
  152. {
  153. return false;
  154. }
  155. ulong inst = gpuAccessor.MemoryRead<ulong>(startAdddress + currBlock.EndAddress);
  156. return inst != 0UL && inst != ShaderEndDelimiter;
  157. }
  158. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  159. {
  160. index = 0;
  161. int left = 0;
  162. int right = blocks.Count - 1;
  163. while (left <= right)
  164. {
  165. int size = right - left;
  166. int middle = left + (size >> 1);
  167. Block block = blocks[middle];
  168. index = middle;
  169. if (address >= block.Address && address < block.EndAddress)
  170. {
  171. return true;
  172. }
  173. if (address < block.Address)
  174. {
  175. right = middle - 1;
  176. }
  177. else
  178. {
  179. left = middle + 1;
  180. }
  181. }
  182. return false;
  183. }
  184. private static void FillBlock(
  185. IGpuAccessor gpuAccessor,
  186. Block block,
  187. ulong limitAddress,
  188. ulong startAddress,
  189. out bool hasBindless)
  190. {
  191. ulong address = block.Address;
  192. hasBindless = false;
  193. do
  194. {
  195. if (address + 7 >= limitAddress)
  196. {
  197. break;
  198. }
  199. // Ignore scheduling instructions, which are written every 32 bytes.
  200. if ((address & 0x1f) == 0)
  201. {
  202. address += 8;
  203. continue;
  204. }
  205. ulong opAddress = address;
  206. address += 8;
  207. long opCode = gpuAccessor.MemoryRead<long>(startAddress + opAddress);
  208. (InstEmitter emitter, OpCodeTable.MakeOp makeOp) = OpCodeTable.GetEmitter(opCode);
  209. if (emitter == null)
  210. {
  211. // TODO: Warning, illegal encoding.
  212. block.OpCodes.Add(new OpCode(null, opAddress, opCode));
  213. continue;
  214. }
  215. if (makeOp == null)
  216. {
  217. throw new ArgumentNullException(nameof(makeOp));
  218. }
  219. OpCode op = makeOp(emitter, opAddress, opCode);
  220. // We check these patterns to figure out the presence of bindless access
  221. hasBindless |= (op is OpCodeImage image && image.IsBindless) ||
  222. (op is OpCodeTxd txd && txd.IsBindless) ||
  223. (op is OpCodeTld4B) ||
  224. (emitter == InstEmit.TexB) ||
  225. (emitter == InstEmit.TldB) ||
  226. (emitter == InstEmit.TmmlB) ||
  227. (emitter == InstEmit.TxqB);
  228. block.OpCodes.Add(op);
  229. }
  230. while (!IsControlFlowChange(block.GetLastOp()));
  231. block.EndAddress = address;
  232. block.UpdatePushOps();
  233. }
  234. private static bool IsUnconditionalBranch(OpCode opCode)
  235. {
  236. return IsUnconditional(opCode) && IsControlFlowChange(opCode);
  237. }
  238. private static bool IsUnconditional(OpCode opCode)
  239. {
  240. if (opCode is OpCodeExit op && op.Condition != Condition.Always)
  241. {
  242. return false;
  243. }
  244. return opCode.Predicate.Index == RegisterConsts.PredicateTrueIndex && !opCode.InvertPredicate;
  245. }
  246. private static bool IsControlFlowChange(OpCode opCode)
  247. {
  248. return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
  249. opCode is OpCodeBranchIndir ||
  250. opCode is OpCodeBranchPop ||
  251. opCode is OpCodeExit;
  252. }
  253. private enum MergeType
  254. {
  255. Brk = 0,
  256. Sync = 1
  257. }
  258. private struct PathBlockState
  259. {
  260. public Block Block { get; }
  261. private enum RestoreType
  262. {
  263. None,
  264. PopPushOp,
  265. PushBranchOp
  266. }
  267. private RestoreType _restoreType;
  268. private ulong _restoreValue;
  269. private MergeType _restoreMergeType;
  270. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  271. public PathBlockState(Block block)
  272. {
  273. Block = block;
  274. _restoreType = RestoreType.None;
  275. _restoreValue = 0;
  276. _restoreMergeType = default;
  277. }
  278. public PathBlockState(int oldStackSize)
  279. {
  280. Block = null;
  281. _restoreType = RestoreType.PopPushOp;
  282. _restoreValue = (ulong)oldStackSize;
  283. _restoreMergeType = default;
  284. }
  285. public PathBlockState(ulong syncAddress, MergeType mergeType)
  286. {
  287. Block = null;
  288. _restoreType = RestoreType.PushBranchOp;
  289. _restoreValue = syncAddress;
  290. _restoreMergeType = mergeType;
  291. }
  292. public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
  293. {
  294. if (_restoreType == RestoreType.PushBranchOp)
  295. {
  296. branchStack.Push((_restoreValue, _restoreMergeType));
  297. }
  298. else if (_restoreType == RestoreType.PopPushOp)
  299. {
  300. while (branchStack.Count > (uint)_restoreValue)
  301. {
  302. branchStack.Pop();
  303. }
  304. }
  305. }
  306. }
  307. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  308. {
  309. OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
  310. Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
  311. HashSet<Block> visited = new HashSet<Block>();
  312. Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
  313. void Push(PathBlockState pbs)
  314. {
  315. // When block is null, this means we are pushing a restore operation.
  316. // Restore operations are used to undo the work done inside a block
  317. // when we return from it, for example it pops addresses pushed by
  318. // SSY/PBK instructions inside the block, and pushes addresses poped
  319. // by SYNC/BRK.
  320. // For blocks, if it's already visited, we just ignore to avoid going
  321. // around in circles and getting stuck here.
  322. if (pbs.Block == null || !visited.Contains(pbs.Block))
  323. {
  324. workQueue.Push(pbs);
  325. }
  326. }
  327. Push(new PathBlockState(currBlock));
  328. while (workQueue.TryPop(out PathBlockState pbs))
  329. {
  330. if (pbs.ReturningFromVisit)
  331. {
  332. pbs.RestoreStackState(branchStack);
  333. continue;
  334. }
  335. Block current = pbs.Block;
  336. // If the block was already processed, we just ignore it, otherwise
  337. // we would push the same child blocks of an already processed block,
  338. // and go around in circles until memory is exhausted.
  339. if (!visited.Add(current))
  340. {
  341. continue;
  342. }
  343. int pushOpsCount = current.PushOpCodes.Count;
  344. if (pushOpsCount != 0)
  345. {
  346. Push(new PathBlockState(branchStack.Count));
  347. for (int index = pushOpIndex; index < pushOpsCount; index++)
  348. {
  349. OpCodePush currentPushOp = current.PushOpCodes[index];
  350. MergeType pushMergeType = currentPushOp.Emitter == InstEmit.Ssy ? MergeType.Sync : MergeType.Brk;
  351. branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
  352. }
  353. }
  354. pushOpIndex = 0;
  355. if (current.Next != null)
  356. {
  357. Push(new PathBlockState(current.Next));
  358. }
  359. if (current.Branch != null)
  360. {
  361. Push(new PathBlockState(current.Branch));
  362. }
  363. else if (current.GetLastOp() is OpCodeBranchIndir brIndir)
  364. {
  365. // By adding them in descending order (sorted by address), we process the blocks
  366. // in order (of ascending address), since we work with a LIFO.
  367. foreach (Block possibleTarget in brIndir.PossibleTargets.OrderByDescending(x => x.Address))
  368. {
  369. Push(new PathBlockState(possibleTarget));
  370. }
  371. }
  372. else if (current.GetLastOp() is OpCodeBranchPop op)
  373. {
  374. MergeType popMergeType = op.Emitter == InstEmit.Sync ? MergeType.Sync : MergeType.Brk;
  375. bool found = true;
  376. ulong targetAddress = 0UL;
  377. MergeType mergeType;
  378. do
  379. {
  380. if (branchStack.Count == 0)
  381. {
  382. found = false;
  383. break;
  384. }
  385. (targetAddress, mergeType) = branchStack.Pop();
  386. // Push the target address (this will be used to push the address
  387. // back into the SSY/PBK stack when we return from that block),
  388. Push(new PathBlockState(targetAddress, mergeType));
  389. }
  390. while (mergeType != popMergeType);
  391. // Make sure we found the correct address,
  392. // the push and pop instruction types must match, so:
  393. // - BRK can only consume addresses pushed by PBK.
  394. // - SYNC can only consume addresses pushed by SSY.
  395. if (found)
  396. {
  397. if (branchStack.Count == 0)
  398. {
  399. // If the entire stack was consumed, then the current pop instruction
  400. // just consumed the address from out push instruction.
  401. op.Targets.Add(pushOp, op.Targets.Count);
  402. pushOp.PopOps.TryAdd(op, Local());
  403. }
  404. else
  405. {
  406. // Push the block itself into the work "queue" (well, it's a stack)
  407. // for processing.
  408. Push(new PathBlockState(blocks[targetAddress]));
  409. }
  410. }
  411. }
  412. }
  413. }
  414. }
  415. }