Decoder.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. using Ryujinx.Graphics.Shader.Instructions;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. namespace Ryujinx.Graphics.Shader.Decoders
  7. {
  8. static class Decoder
  9. {
  10. public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
  11. {
  12. hasBindless = false;
  13. List<Block[]> funcs = new List<Block[]>();
  14. Queue<ulong> funcQueue = new Queue<ulong>();
  15. HashSet<ulong> funcVisited = new HashSet<ulong>();
  16. void EnqueueFunction(ulong funcAddress)
  17. {
  18. if (funcVisited.Add(funcAddress))
  19. {
  20. funcQueue.Enqueue(funcAddress);
  21. }
  22. }
  23. funcQueue.Enqueue(0);
  24. while (funcQueue.TryDequeue(out ulong funcAddress))
  25. {
  26. List<Block> blocks = new List<Block>();
  27. Queue<Block> workQueue = new Queue<Block>();
  28. Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
  29. Block GetBlock(ulong blkAddress)
  30. {
  31. if (!visited.TryGetValue(blkAddress, out Block block))
  32. {
  33. block = new Block(blkAddress);
  34. workQueue.Enqueue(block);
  35. visited.Add(blkAddress, block);
  36. }
  37. return block;
  38. }
  39. GetBlock(funcAddress);
  40. while (workQueue.TryDequeue(out Block currBlock))
  41. {
  42. // Check if the current block is inside another block.
  43. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  44. {
  45. Block nBlock = blocks[nBlkIndex];
  46. if (nBlock.Address == currBlock.Address)
  47. {
  48. throw new InvalidOperationException("Found duplicate block address on the list.");
  49. }
  50. nBlock.Split(currBlock);
  51. blocks.Insert(nBlkIndex + 1, currBlock);
  52. continue;
  53. }
  54. // If we have a block after the current one, set the limit address.
  55. ulong limitAddress = ulong.MaxValue;
  56. if (nBlkIndex != blocks.Count)
  57. {
  58. Block nBlock = blocks[nBlkIndex];
  59. int nextIndex = nBlkIndex + 1;
  60. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  61. {
  62. limitAddress = blocks[nextIndex].Address;
  63. }
  64. else if (nBlock.Address > currBlock.Address)
  65. {
  66. limitAddress = blocks[nBlkIndex].Address;
  67. }
  68. }
  69. FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
  70. hasBindless |= blockHasBindless;
  71. if (currBlock.OpCodes.Count != 0)
  72. {
  73. // We should have blocks for all possible branch targets,
  74. // including those from SSY/PBK instructions.
  75. foreach (OpCodePush pushOp in currBlock.PushOpCodes)
  76. {
  77. GetBlock(pushOp.GetAbsoluteAddress());
  78. }
  79. // Set child blocks. "Branch" is the block the branch instruction
  80. // points to (when taken), "Next" is the block at the next address,
  81. // executed when the branch is not taken. For Unconditional Branches
  82. // or end of program, Next is null.
  83. OpCode lastOp = currBlock.GetLastOp();
  84. if (lastOp is OpCodeBranch opBr)
  85. {
  86. if (lastOp.Emitter == InstEmit.Cal)
  87. {
  88. EnqueueFunction(opBr.GetAbsoluteAddress());
  89. }
  90. else
  91. {
  92. currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
  93. }
  94. }
  95. else if (lastOp is OpCodeBranchIndir opBrIndir)
  96. {
  97. // An indirect branch could go anywhere, we don't know the target.
  98. // Those instructions are usually used on a switch to jump table
  99. // compiler optimization, and in those cases the possible targets
  100. // seems to be always right after the BRX itself. We can assume
  101. // that the possible targets are all the blocks in-between the
  102. // instruction right after the BRX, and the common target that
  103. // all the "cases" should eventually jump to, acting as the
  104. // switch break.
  105. Block firstTarget = GetBlock(currBlock.EndAddress);
  106. firstTarget.BrIndir = opBrIndir;
  107. opBrIndir.PossibleTargets.Add(firstTarget);
  108. }
  109. if (!IsUnconditionalBranch(lastOp))
  110. {
  111. currBlock.Next = GetBlock(currBlock.EndAddress);
  112. }
  113. }
  114. // Insert the new block on the list (sorted by address).
  115. if (blocks.Count != 0)
  116. {
  117. Block nBlock = blocks[nBlkIndex];
  118. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  119. }
  120. else
  121. {
  122. blocks.Add(currBlock);
  123. }
  124. // Do we have a block after the current one?
  125. if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress))
  126. {
  127. bool targetVisited = visited.ContainsKey(currBlock.EndAddress);
  128. Block possibleTarget = GetBlock(currBlock.EndAddress);
  129. currBlock.BrIndir.PossibleTargets.Add(possibleTarget);
  130. if (!targetVisited)
  131. {
  132. possibleTarget.BrIndir = currBlock.BrIndir;
  133. }
  134. }
  135. }
  136. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  137. {
  138. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  139. {
  140. PropagatePushOp(visited, block, pushOpIndex);
  141. }
  142. }
  143. funcs.Add(blocks.ToArray());
  144. }
  145. return funcs.ToArray();
  146. }
  147. private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress)
  148. {
  149. if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) ||
  150. !gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7))
  151. {
  152. return false;
  153. }
  154. ulong inst = gpuAccessor.MemoryRead<ulong>(startAdddress + currBlock.EndAddress);
  155. return inst != 0UL;
  156. }
  157. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  158. {
  159. index = 0;
  160. int left = 0;
  161. int right = blocks.Count - 1;
  162. while (left <= right)
  163. {
  164. int size = right - left;
  165. int middle = left + (size >> 1);
  166. Block block = blocks[middle];
  167. index = middle;
  168. if (address >= block.Address && address < block.EndAddress)
  169. {
  170. return true;
  171. }
  172. if (address < block.Address)
  173. {
  174. right = middle - 1;
  175. }
  176. else
  177. {
  178. left = middle + 1;
  179. }
  180. }
  181. return false;
  182. }
  183. private static void FillBlock(
  184. IGpuAccessor gpuAccessor,
  185. Block block,
  186. ulong limitAddress,
  187. ulong startAddress,
  188. out bool hasBindless)
  189. {
  190. ulong address = block.Address;
  191. hasBindless = false;
  192. do
  193. {
  194. if (address + 7 >= limitAddress)
  195. {
  196. break;
  197. }
  198. // Ignore scheduling instructions, which are written every 32 bytes.
  199. if ((address & 0x1f) == 0)
  200. {
  201. address += 8;
  202. continue;
  203. }
  204. ulong opAddress = address;
  205. address += 8;
  206. long opCode = gpuAccessor.MemoryRead<long>(startAddress + opAddress);
  207. (InstEmitter emitter, OpCodeTable.MakeOp makeOp) = OpCodeTable.GetEmitter(opCode);
  208. if (emitter == null)
  209. {
  210. // TODO: Warning, illegal encoding.
  211. block.OpCodes.Add(new OpCode(null, opAddress, opCode));
  212. continue;
  213. }
  214. if (makeOp == null)
  215. {
  216. throw new ArgumentNullException(nameof(makeOp));
  217. }
  218. OpCode op = makeOp(emitter, opAddress, opCode);
  219. // We check these patterns to figure out the presence of bindless access
  220. hasBindless |= (op is OpCodeImage image && image.IsBindless) ||
  221. (op is OpCodeTxd txd && txd.IsBindless) ||
  222. (op is OpCodeTld4B) ||
  223. (emitter == InstEmit.TexB) ||
  224. (emitter == InstEmit.TldB) ||
  225. (emitter == InstEmit.TmmlB) ||
  226. (emitter == InstEmit.TxqB);
  227. block.OpCodes.Add(op);
  228. }
  229. while (!IsControlFlowChange(block.GetLastOp()));
  230. block.EndAddress = address;
  231. block.UpdatePushOps();
  232. }
  233. private static bool IsUnconditionalBranch(OpCode opCode)
  234. {
  235. return IsUnconditional(opCode) && IsControlFlowChange(opCode);
  236. }
  237. private static bool IsUnconditional(OpCode opCode)
  238. {
  239. if (opCode is OpCodeExit op && op.Condition != Condition.Always)
  240. {
  241. return false;
  242. }
  243. return opCode.Predicate.Index == RegisterConsts.PredicateTrueIndex && !opCode.InvertPredicate;
  244. }
  245. private static bool IsControlFlowChange(OpCode opCode)
  246. {
  247. return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
  248. opCode is OpCodeBranchIndir ||
  249. opCode is OpCodeBranchPop ||
  250. opCode is OpCodeExit;
  251. }
  252. private struct PathBlockState
  253. {
  254. public Block Block { get; }
  255. private enum RestoreType
  256. {
  257. None,
  258. PopPushOp,
  259. PushBranchOp
  260. }
  261. private RestoreType _restoreType;
  262. private ulong _restoreValue;
  263. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  264. public PathBlockState(Block block)
  265. {
  266. Block = block;
  267. _restoreType = RestoreType.None;
  268. _restoreValue = 0;
  269. }
  270. public PathBlockState(int oldStackSize)
  271. {
  272. Block = null;
  273. _restoreType = RestoreType.PopPushOp;
  274. _restoreValue = (ulong)oldStackSize;
  275. }
  276. public PathBlockState(ulong syncAddress)
  277. {
  278. Block = null;
  279. _restoreType = RestoreType.PushBranchOp;
  280. _restoreValue = syncAddress;
  281. }
  282. public void RestoreStackState(Stack<ulong> branchStack)
  283. {
  284. if (_restoreType == RestoreType.PushBranchOp)
  285. {
  286. branchStack.Push(_restoreValue);
  287. }
  288. else if (_restoreType == RestoreType.PopPushOp)
  289. {
  290. while (branchStack.Count > (uint)_restoreValue)
  291. {
  292. branchStack.Pop();
  293. }
  294. }
  295. }
  296. }
  297. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  298. {
  299. OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
  300. Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
  301. HashSet<Block> visited = new HashSet<Block>();
  302. Stack<ulong> branchStack = new Stack<ulong>();
  303. void Push(PathBlockState pbs)
  304. {
  305. // When block is null, this means we are pushing a restore operation.
  306. // Restore operations are used to undo the work done inside a block
  307. // when we return from it, for example it pops addresses pushed by
  308. // SSY/PBK instructions inside the block, and pushes addresses poped
  309. // by SYNC/BRK.
  310. // For blocks, if it's already visited, we just ignore to avoid going
  311. // around in circles and getting stuck here.
  312. if (pbs.Block == null || !visited.Contains(pbs.Block))
  313. {
  314. workQueue.Push(pbs);
  315. }
  316. }
  317. Push(new PathBlockState(currBlock));
  318. while (workQueue.TryPop(out PathBlockState pbs))
  319. {
  320. if (pbs.ReturningFromVisit)
  321. {
  322. pbs.RestoreStackState(branchStack);
  323. continue;
  324. }
  325. Block current = pbs.Block;
  326. // If the block was already processed, we just ignore it, otherwise
  327. // we would push the same child blocks of an already processed block,
  328. // and go around in circles until memory is exhausted.
  329. if (!visited.Add(current))
  330. {
  331. continue;
  332. }
  333. int pushOpsCount = current.PushOpCodes.Count;
  334. if (pushOpsCount != 0)
  335. {
  336. Push(new PathBlockState(branchStack.Count));
  337. for (int index = pushOpIndex; index < pushOpsCount; index++)
  338. {
  339. branchStack.Push(current.PushOpCodes[index].GetAbsoluteAddress());
  340. }
  341. }
  342. pushOpIndex = 0;
  343. if (current.Next != null)
  344. {
  345. Push(new PathBlockState(current.Next));
  346. }
  347. if (current.Branch != null)
  348. {
  349. Push(new PathBlockState(current.Branch));
  350. }
  351. else if (current.GetLastOp() is OpCodeBranchIndir brIndir)
  352. {
  353. // By adding them in descending order (sorted by address), we process the blocks
  354. // in order (of ascending address), since we work with a LIFO.
  355. foreach (Block possibleTarget in brIndir.PossibleTargets.OrderByDescending(x => x.Address))
  356. {
  357. Push(new PathBlockState(possibleTarget));
  358. }
  359. }
  360. else if (current.GetLastOp() is OpCodeBranchPop op)
  361. {
  362. ulong targetAddress = branchStack.Pop();
  363. if (branchStack.Count == 0)
  364. {
  365. branchStack.Push(targetAddress);
  366. op.Targets.Add(pushOp, op.Targets.Count);
  367. pushOp.PopOps.TryAdd(op, Local());
  368. }
  369. else
  370. {
  371. // First we push the target address (this will be used to push the
  372. // address back into the SSY/PBK stack when we return from that block),
  373. // then we push the block itself into the work "queue" (well, it's a stack)
  374. // for processing.
  375. Push(new PathBlockState(targetAddress));
  376. Push(new PathBlockState(blocks[targetAddress]));
  377. }
  378. }
  379. }
  380. }
  381. }
  382. }