Decoder.cs 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. using Ryujinx.Graphics.Shader.Instructions;
  2. using Ryujinx.Graphics.Shader.Translation;
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Decoders
  8. {
  9. static class Decoder
  10. {
  11. public static Block[][] Decode(ShaderConfig config, ulong startAddress)
  12. {
  13. List<Block[]> funcs = new List<Block[]>();
  14. Queue<ulong> funcQueue = new Queue<ulong>();
  15. HashSet<ulong> funcVisited = new HashSet<ulong>();
  16. void EnqueueFunction(ulong funcAddress)
  17. {
  18. if (funcVisited.Add(funcAddress))
  19. {
  20. funcQueue.Enqueue(funcAddress);
  21. }
  22. }
  23. funcQueue.Enqueue(0);
  24. while (funcQueue.TryDequeue(out ulong funcAddress))
  25. {
  26. List<Block> blocks = new List<Block>();
  27. Queue<Block> workQueue = new Queue<Block>();
  28. Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
  29. Block GetBlock(ulong blkAddress)
  30. {
  31. if (!visited.TryGetValue(blkAddress, out Block block))
  32. {
  33. block = new Block(blkAddress);
  34. workQueue.Enqueue(block);
  35. visited.Add(blkAddress, block);
  36. }
  37. return block;
  38. }
  39. GetBlock(funcAddress);
  40. bool hasNewTarget;
  41. do
  42. {
  43. while (workQueue.TryDequeue(out Block currBlock))
  44. {
  45. // Check if the current block is inside another block.
  46. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  47. {
  48. Block nBlock = blocks[nBlkIndex];
  49. if (nBlock.Address == currBlock.Address)
  50. {
  51. throw new InvalidOperationException("Found duplicate block address on the list.");
  52. }
  53. nBlock.Split(currBlock);
  54. blocks.Insert(nBlkIndex + 1, currBlock);
  55. continue;
  56. }
  57. // If we have a block after the current one, set the limit address.
  58. ulong limitAddress = ulong.MaxValue;
  59. if (nBlkIndex != blocks.Count)
  60. {
  61. Block nBlock = blocks[nBlkIndex];
  62. int nextIndex = nBlkIndex + 1;
  63. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  64. {
  65. limitAddress = blocks[nextIndex].Address;
  66. }
  67. else if (nBlock.Address > currBlock.Address)
  68. {
  69. limitAddress = blocks[nBlkIndex].Address;
  70. }
  71. }
  72. FillBlock(config, currBlock, limitAddress, startAddress);
  73. if (currBlock.OpCodes.Count != 0)
  74. {
  75. // We should have blocks for all possible branch targets,
  76. // including those from SSY/PBK instructions.
  77. foreach (OpCodePush pushOp in currBlock.PushOpCodes)
  78. {
  79. GetBlock(pushOp.GetAbsoluteAddress());
  80. }
  81. // Set child blocks. "Branch" is the block the branch instruction
  82. // points to (when taken), "Next" is the block at the next address,
  83. // executed when the branch is not taken. For Unconditional Branches
  84. // or end of program, Next is null.
  85. OpCode lastOp = currBlock.GetLastOp();
  86. if (lastOp is OpCodeBranch opBr)
  87. {
  88. if (lastOp.Emitter == InstEmit.Cal)
  89. {
  90. EnqueueFunction(opBr.GetAbsoluteAddress());
  91. }
  92. else
  93. {
  94. currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
  95. }
  96. }
  97. if (!IsUnconditionalBranch(lastOp))
  98. {
  99. currBlock.Next = GetBlock(currBlock.EndAddress);
  100. }
  101. }
  102. // Insert the new block on the list (sorted by address).
  103. if (blocks.Count != 0)
  104. {
  105. Block nBlock = blocks[nBlkIndex];
  106. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  107. }
  108. else
  109. {
  110. blocks.Add(currBlock);
  111. }
  112. }
  113. // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
  114. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  115. {
  116. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  117. {
  118. PropagatePushOp(visited, block, pushOpIndex);
  119. }
  120. }
  121. // Try to find target for BRX (indirect branch) instructions.
  122. hasNewTarget = false;
  123. foreach (Block block in blocks)
  124. {
  125. if (block.GetLastOp() is OpCodeBranchIndir opBrIndir && opBrIndir.PossibleTargets.Count == 0)
  126. {
  127. ulong baseOffset = opBrIndir.Address + 8 + (ulong)opBrIndir.Offset;
  128. // An indirect branch could go anywhere,
  129. // try to get the possible target offsets from the constant buffer.
  130. (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrIndir.Ra.Index);
  131. if (cbOffsetsCount != 0)
  132. {
  133. hasNewTarget = true;
  134. }
  135. for (int i = 0; i < cbOffsetsCount; i++)
  136. {
  137. uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
  138. Block target = GetBlock(baseOffset + targetOffset);
  139. opBrIndir.PossibleTargets.Add(target);
  140. target.Predecessors.Add(block);
  141. }
  142. }
  143. }
  144. // If we discovered new branch targets from the BRX instruction,
  145. // we need another round of decoding to decode the new blocks.
  146. // Additionally, we may have more SSY/PBK targets to propagate,
  147. // and new BRX instructions.
  148. }
  149. while (hasNewTarget);
  150. funcs.Add(blocks.ToArray());
  151. }
  152. return funcs.ToArray();
  153. }
  154. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  155. {
  156. index = 0;
  157. int left = 0;
  158. int right = blocks.Count - 1;
  159. while (left <= right)
  160. {
  161. int size = right - left;
  162. int middle = left + (size >> 1);
  163. Block block = blocks[middle];
  164. index = middle;
  165. if (address >= block.Address && address < block.EndAddress)
  166. {
  167. return true;
  168. }
  169. if (address < block.Address)
  170. {
  171. right = middle - 1;
  172. }
  173. else
  174. {
  175. left = middle + 1;
  176. }
  177. }
  178. return false;
  179. }
  180. private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
  181. {
  182. IGpuAccessor gpuAccessor = config.GpuAccessor;
  183. ulong address = block.Address;
  184. do
  185. {
  186. if (address + 7 >= limitAddress)
  187. {
  188. break;
  189. }
  190. // Ignore scheduling instructions, which are written every 32 bytes.
  191. if ((address & 0x1f) == 0)
  192. {
  193. address += 8;
  194. continue;
  195. }
  196. ulong opAddress = address;
  197. address += 8;
  198. long opCode = gpuAccessor.MemoryRead<long>(startAddress + opAddress);
  199. (InstEmitter emitter, OpCodeTable.MakeOp makeOp) = OpCodeTable.GetEmitter(opCode);
  200. if (emitter == null)
  201. {
  202. // TODO: Warning, illegal encoding.
  203. block.OpCodes.Add(new OpCode(null, opAddress, opCode));
  204. continue;
  205. }
  206. if (makeOp == null)
  207. {
  208. throw new ArgumentNullException(nameof(makeOp));
  209. }
  210. OpCode op = makeOp(emitter, opAddress, opCode);
  211. // We check these patterns to figure out the presence of bindless access
  212. if ((op is OpCodeImage image && image.IsBindless) ||
  213. (op is OpCodeTxd txd && txd.IsBindless) ||
  214. (op is OpCodeTld4B) ||
  215. (emitter == InstEmit.TexB) ||
  216. (emitter == InstEmit.TldB) ||
  217. (emitter == InstEmit.TmmlB) ||
  218. (emitter == InstEmit.TxqB))
  219. {
  220. config.SetUsedFeature(FeatureFlags.Bindless);
  221. }
  222. // Populate used attributes.
  223. if (op is IOpCodeAttribute opAttr)
  224. {
  225. SetUserAttributeUses(config, opAttr);
  226. }
  227. block.OpCodes.Add(op);
  228. }
  229. while (!IsControlFlowChange(block.GetLastOp()));
  230. block.EndAddress = address;
  231. block.UpdatePushOps();
  232. }
  233. private static void SetUserAttributeUses(ShaderConfig config, IOpCodeAttribute opAttr)
  234. {
  235. if (opAttr.Indexed)
  236. {
  237. if (opAttr.Emitter == InstEmit.Ast)
  238. {
  239. config.SetAllOutputUserAttributes();
  240. }
  241. else
  242. {
  243. config.SetAllInputUserAttributes();
  244. }
  245. }
  246. else
  247. {
  248. for (int elemIndex = 0; elemIndex < opAttr.Count; elemIndex++)
  249. {
  250. int attr = opAttr.AttributeOffset + elemIndex * 4;
  251. if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
  252. {
  253. int index = (attr - AttributeConsts.UserAttributeBase) / 16;
  254. if (opAttr.Emitter == InstEmit.Ast)
  255. {
  256. config.SetOutputUserAttribute(index);
  257. }
  258. else
  259. {
  260. config.SetInputUserAttribute(index);
  261. }
  262. }
  263. }
  264. }
  265. }
  266. private static bool IsUnconditionalBranch(OpCode opCode)
  267. {
  268. return IsUnconditional(opCode) && IsControlFlowChange(opCode);
  269. }
  270. private static bool IsUnconditional(OpCode opCode)
  271. {
  272. if (opCode is OpCodeExit op && op.Condition != Condition.Always)
  273. {
  274. return false;
  275. }
  276. return opCode.Predicate.Index == RegisterConsts.PredicateTrueIndex && !opCode.InvertPredicate;
  277. }
  278. private static bool IsControlFlowChange(OpCode opCode)
  279. {
  280. return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
  281. opCode is OpCodeBranchIndir ||
  282. opCode is OpCodeBranchPop ||
  283. opCode is OpCodeExit;
  284. }
  285. private static (int, int) FindBrxTargetRange(Block block, int brxReg)
  286. {
  287. // Try to match the following pattern:
  288. //
  289. // IMNMX.U32 Rx, Rx, UpperBound, PT
  290. // SHL Rx, Rx, 0x2
  291. // LDC Rx, c[0x1][Rx+BaseOffset]
  292. //
  293. // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
  294. // The above pattern is assumed to be generated by the compiler before BRX,
  295. // as the instruction is usually used to implement jump tables for switch statement optimizations.
  296. // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
  297. // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
  298. HashSet<Block> visited = new HashSet<Block>();
  299. var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
  300. if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index] is not OpCodeLdc opLdc)
  301. {
  302. return (0, 0);
  303. }
  304. if (opLdc.Slot != 1 || opLdc.IndexMode != CbIndexMode.Default)
  305. {
  306. return (0, 0);
  307. }
  308. var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.Ra.Index);
  309. if (shlLocation.Block == null || shlLocation.Block.OpCodes[shlLocation.Index] is not OpCodeAluImm opShl)
  310. {
  311. return (0, 0);
  312. }
  313. if (opShl.Emitter != InstEmit.Shl || opShl.Immediate != 2)
  314. {
  315. return (0, 0);
  316. }
  317. var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.Ra.Index);
  318. if (imnmxLocation.Block == null || imnmxLocation.Block.OpCodes[imnmxLocation.Index] is not OpCodeAluImm opImnmx)
  319. {
  320. return (0, 0);
  321. }
  322. bool isImnmxS32 = opImnmx.RawOpCode.Extract(48);
  323. if (opImnmx.Emitter != InstEmit.Imnmx || isImnmxS32 || !opImnmx.Predicate39.IsPT || opImnmx.InvertP)
  324. {
  325. return (0, 0);
  326. }
  327. return (opLdc.Offset, opImnmx.Immediate + 1);
  328. }
  329. private struct BlockLocation
  330. {
  331. public Block Block { get; }
  332. public int Index { get; }
  333. public BlockLocation(Block block, int index)
  334. {
  335. Block = block;
  336. Index = index;
  337. }
  338. }
  339. private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
  340. {
  341. Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
  342. toVisit.Enqueue(location);
  343. visited.Add(location.Block);
  344. while (toVisit.TryDequeue(out var currentLocation))
  345. {
  346. Block block = currentLocation.Block;
  347. for (int i = currentLocation.Index - 1; i >= 0; i--)
  348. {
  349. if (WritesToRegister(block.OpCodes[i], regIndex))
  350. {
  351. return new BlockLocation(block, i);
  352. }
  353. }
  354. foreach (Block predecessor in block.Predecessors)
  355. {
  356. if (visited.Add(predecessor))
  357. {
  358. toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
  359. }
  360. }
  361. }
  362. return new BlockLocation(null, 0);
  363. }
  364. private static bool WritesToRegister(OpCode opCode, int regIndex)
  365. {
  366. // Predicate instruction only ever writes to predicate, so we shouldn't check those.
  367. if (opCode.Emitter == InstEmit.Fsetp ||
  368. opCode.Emitter == InstEmit.Hsetp2 ||
  369. opCode.Emitter == InstEmit.Isetp ||
  370. opCode.Emitter == InstEmit.R2p)
  371. {
  372. return false;
  373. }
  374. return opCode is IOpCodeRd opRd && opRd.Rd.Index == regIndex;
  375. }
  376. private enum MergeType
  377. {
  378. Brk = 0,
  379. Sync = 1
  380. }
  381. private struct PathBlockState
  382. {
  383. public Block Block { get; }
  384. private enum RestoreType
  385. {
  386. None,
  387. PopPushOp,
  388. PushBranchOp
  389. }
  390. private RestoreType _restoreType;
  391. private ulong _restoreValue;
  392. private MergeType _restoreMergeType;
  393. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  394. public PathBlockState(Block block)
  395. {
  396. Block = block;
  397. _restoreType = RestoreType.None;
  398. _restoreValue = 0;
  399. _restoreMergeType = default;
  400. }
  401. public PathBlockState(int oldStackSize)
  402. {
  403. Block = null;
  404. _restoreType = RestoreType.PopPushOp;
  405. _restoreValue = (ulong)oldStackSize;
  406. _restoreMergeType = default;
  407. }
  408. public PathBlockState(ulong syncAddress, MergeType mergeType)
  409. {
  410. Block = null;
  411. _restoreType = RestoreType.PushBranchOp;
  412. _restoreValue = syncAddress;
  413. _restoreMergeType = mergeType;
  414. }
  415. public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
  416. {
  417. if (_restoreType == RestoreType.PushBranchOp)
  418. {
  419. branchStack.Push((_restoreValue, _restoreMergeType));
  420. }
  421. else if (_restoreType == RestoreType.PopPushOp)
  422. {
  423. while (branchStack.Count > (uint)_restoreValue)
  424. {
  425. branchStack.Pop();
  426. }
  427. }
  428. }
  429. }
  430. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  431. {
  432. OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
  433. Block target = blocks[pushOp.GetAbsoluteAddress()];
  434. Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
  435. HashSet<Block> visited = new HashSet<Block>();
  436. Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
  437. void Push(PathBlockState pbs)
  438. {
  439. // When block is null, this means we are pushing a restore operation.
  440. // Restore operations are used to undo the work done inside a block
  441. // when we return from it, for example it pops addresses pushed by
  442. // SSY/PBK instructions inside the block, and pushes addresses poped
  443. // by SYNC/BRK.
  444. // For blocks, if it's already visited, we just ignore to avoid going
  445. // around in circles and getting stuck here.
  446. if (pbs.Block == null || !visited.Contains(pbs.Block))
  447. {
  448. workQueue.Push(pbs);
  449. }
  450. }
  451. Push(new PathBlockState(currBlock));
  452. while (workQueue.TryPop(out PathBlockState pbs))
  453. {
  454. if (pbs.ReturningFromVisit)
  455. {
  456. pbs.RestoreStackState(branchStack);
  457. continue;
  458. }
  459. Block current = pbs.Block;
  460. // If the block was already processed, we just ignore it, otherwise
  461. // we would push the same child blocks of an already processed block,
  462. // and go around in circles until memory is exhausted.
  463. if (!visited.Add(current))
  464. {
  465. continue;
  466. }
  467. int pushOpsCount = current.PushOpCodes.Count;
  468. if (pushOpsCount != 0)
  469. {
  470. Push(new PathBlockState(branchStack.Count));
  471. for (int index = pushOpIndex; index < pushOpsCount; index++)
  472. {
  473. OpCodePush currentPushOp = current.PushOpCodes[index];
  474. MergeType pushMergeType = currentPushOp.Emitter == InstEmit.Ssy ? MergeType.Sync : MergeType.Brk;
  475. branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
  476. }
  477. }
  478. pushOpIndex = 0;
  479. if (current.Next != null)
  480. {
  481. Push(new PathBlockState(current.Next));
  482. }
  483. if (current.Branch != null)
  484. {
  485. Push(new PathBlockState(current.Branch));
  486. }
  487. else if (current.GetLastOp() is OpCodeBranchIndir brIndir)
  488. {
  489. // By adding them in descending order (sorted by address), we process the blocks
  490. // in order (of ascending address), since we work with a LIFO.
  491. foreach (Block possibleTarget in brIndir.PossibleTargets.OrderByDescending(x => x.Address))
  492. {
  493. Push(new PathBlockState(possibleTarget));
  494. }
  495. }
  496. else if (current.GetLastOp() is OpCodeBranchPop op)
  497. {
  498. MergeType popMergeType = op.Emitter == InstEmit.Sync ? MergeType.Sync : MergeType.Brk;
  499. bool found = true;
  500. ulong targetAddress = 0UL;
  501. MergeType mergeType;
  502. do
  503. {
  504. if (branchStack.Count == 0)
  505. {
  506. found = false;
  507. break;
  508. }
  509. (targetAddress, mergeType) = branchStack.Pop();
  510. // Push the target address (this will be used to push the address
  511. // back into the SSY/PBK stack when we return from that block),
  512. Push(new PathBlockState(targetAddress, mergeType));
  513. }
  514. while (mergeType != popMergeType);
  515. // Make sure we found the correct address,
  516. // the push and pop instruction types must match, so:
  517. // - BRK can only consume addresses pushed by PBK.
  518. // - SYNC can only consume addresses pushed by SSY.
  519. if (found)
  520. {
  521. if (branchStack.Count == 0)
  522. {
  523. // If the entire stack was consumed, then the current pop instruction
  524. // just consumed the address from our push instruction.
  525. if (op.Targets.TryAdd(pushOp, op.Targets.Count))
  526. {
  527. pushOp.PopOps.Add(op, Local());
  528. target.Predecessors.Add(current);
  529. }
  530. }
  531. else
  532. {
  533. // Push the block itself into the work "queue" (well, it's a stack)
  534. // for processing.
  535. Push(new PathBlockState(blocks[targetAddress]));
  536. }
  537. }
  538. }
  539. }
  540. }
  541. }
  542. }