Decoder.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697
  1. using Ryujinx.Graphics.Shader.Translation;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Runtime.CompilerServices;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Decoders
  8. {
  9. static class Decoder
  10. {
  11. public static Block[][] Decode(ShaderConfig config, ulong startAddress)
  12. {
  13. List<Block[]> funcs = new List<Block[]>();
  14. Queue<ulong> funcQueue = new Queue<ulong>();
  15. HashSet<ulong> funcVisited = new HashSet<ulong>();
  16. void EnqueueFunction(ulong funcAddress)
  17. {
  18. if (funcVisited.Add(funcAddress))
  19. {
  20. funcQueue.Enqueue(funcAddress);
  21. }
  22. }
  23. funcQueue.Enqueue(0);
  24. while (funcQueue.TryDequeue(out ulong funcAddress))
  25. {
  26. List<Block> blocks = new List<Block>();
  27. Queue<Block> workQueue = new Queue<Block>();
  28. Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
  29. Block GetBlock(ulong blkAddress)
  30. {
  31. if (!visited.TryGetValue(blkAddress, out Block block))
  32. {
  33. block = new Block(blkAddress);
  34. workQueue.Enqueue(block);
  35. visited.Add(blkAddress, block);
  36. }
  37. return block;
  38. }
  39. GetBlock(funcAddress);
  40. bool hasNewTarget;
  41. do
  42. {
  43. while (workQueue.TryDequeue(out Block currBlock))
  44. {
  45. // Check if the current block is inside another block.
  46. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  47. {
  48. Block nBlock = blocks[nBlkIndex];
  49. if (nBlock.Address == currBlock.Address)
  50. {
  51. throw new InvalidOperationException("Found duplicate block address on the list.");
  52. }
  53. nBlock.Split(currBlock);
  54. blocks.Insert(nBlkIndex + 1, currBlock);
  55. continue;
  56. }
  57. // If we have a block after the current one, set the limit address.
  58. ulong limitAddress = ulong.MaxValue;
  59. if (nBlkIndex != blocks.Count)
  60. {
  61. Block nBlock = blocks[nBlkIndex];
  62. int nextIndex = nBlkIndex + 1;
  63. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  64. {
  65. limitAddress = blocks[nextIndex].Address;
  66. }
  67. else if (nBlock.Address > currBlock.Address)
  68. {
  69. limitAddress = blocks[nBlkIndex].Address;
  70. }
  71. }
  72. FillBlock(config, currBlock, limitAddress, startAddress);
  73. if (currBlock.OpCodes.Count != 0)
  74. {
  75. // We should have blocks for all possible branch targets,
  76. // including those from SSY/PBK instructions.
  77. foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
  78. {
  79. GetBlock(pushOp.Op.GetAbsoluteAddress());
  80. }
  81. // Set child blocks. "Branch" is the block the branch instruction
  82. // points to (when taken), "Next" is the block at the next address,
  83. // executed when the branch is not taken. For Unconditional Branches
  84. // or end of program, Next is null.
  85. InstOp lastOp = currBlock.GetLastOp();
  86. if (lastOp.Name == InstName.Cal)
  87. {
  88. EnqueueFunction(lastOp.GetAbsoluteAddress());
  89. }
  90. else if (lastOp.Name == InstName.Bra)
  91. {
  92. Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
  93. currBlock.Successors.Add(succBlock);
  94. succBlock.Predecessors.Add(currBlock);
  95. }
  96. if (!IsUnconditionalBranch(ref lastOp))
  97. {
  98. Block succBlock = GetBlock(currBlock.EndAddress);
  99. currBlock.Successors.Insert(0, succBlock);
  100. succBlock.Predecessors.Add(currBlock);
  101. }
  102. }
  103. // Insert the new block on the list (sorted by address).
  104. if (blocks.Count != 0)
  105. {
  106. Block nBlock = blocks[nBlkIndex];
  107. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  108. }
  109. else
  110. {
  111. blocks.Add(currBlock);
  112. }
  113. }
  114. // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
  115. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  116. {
  117. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  118. {
  119. PropagatePushOp(visited, block, pushOpIndex);
  120. }
  121. }
  122. // Try to find targets for BRX (indirect branch) instructions.
  123. hasNewTarget = FindBrxTargets(config, blocks, GetBlock);
  124. // If we discovered new branch targets from the BRX instruction,
  125. // we need another round of decoding to decode the new blocks.
  126. // Additionally, we may have more SSY/PBK targets to propagate,
  127. // and new BRX instructions.
  128. }
  129. while (hasNewTarget);
  130. funcs.Add(blocks.ToArray());
  131. }
  132. return funcs.ToArray();
  133. }
  134. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  135. {
  136. index = 0;
  137. int left = 0;
  138. int right = blocks.Count - 1;
  139. while (left <= right)
  140. {
  141. int size = right - left;
  142. int middle = left + (size >> 1);
  143. Block block = blocks[middle];
  144. index = middle;
  145. if (address >= block.Address && address < block.EndAddress)
  146. {
  147. return true;
  148. }
  149. if (address < block.Address)
  150. {
  151. right = middle - 1;
  152. }
  153. else
  154. {
  155. left = middle + 1;
  156. }
  157. }
  158. return false;
  159. }
  160. private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
  161. {
  162. IGpuAccessor gpuAccessor = config.GpuAccessor;
  163. ulong address = block.Address;
  164. int bufferOffset = 0;
  165. ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
  166. InstOp op = default;
  167. do
  168. {
  169. if (address + 7 >= limitAddress)
  170. {
  171. break;
  172. }
  173. // Ignore scheduling instructions, which are written every 32 bytes.
  174. if ((address & 0x1f) == 0)
  175. {
  176. address += 8;
  177. bufferOffset++;
  178. continue;
  179. }
  180. if (bufferOffset >= buffer.Length)
  181. {
  182. buffer = gpuAccessor.GetCode(startAddress + address, 8);
  183. bufferOffset = 0;
  184. }
  185. ulong opCode = buffer[bufferOffset++];
  186. op = InstTable.GetOp(address, opCode);
  187. if (op.Props.HasFlag(InstProps.TexB))
  188. {
  189. config.SetUsedFeature(FeatureFlags.Bindless);
  190. }
  191. if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
  192. {
  193. SetUserAttributeUses(config, op.Name, opCode);
  194. }
  195. else if (op.Name == InstName.Ssy || op.Name == InstName.Pbk)
  196. {
  197. block.AddPushOp(op);
  198. }
  199. block.OpCodes.Add(op);
  200. address += 8;
  201. }
  202. while (!op.Props.HasFlag(InstProps.Bra));
  203. block.EndAddress = address;
  204. }
  205. private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode)
  206. {
  207. int offset;
  208. int count = 1;
  209. bool isStore = false;
  210. bool indexed = false;
  211. if (name == InstName.Ast)
  212. {
  213. InstAst opAst = new InstAst(opCode);
  214. count = (int)opAst.AlSize + 1;
  215. offset = opAst.Imm11;
  216. indexed = opAst.Phys;
  217. isStore = true;
  218. }
  219. else if (name == InstName.Ald)
  220. {
  221. InstAld opAld = new InstAld(opCode);
  222. count = (int)opAld.AlSize + 1;
  223. indexed = opAld.Phys;
  224. offset = opAld.Imm11;
  225. }
  226. else /* if (name == InstName.Ipa) */
  227. {
  228. InstIpa opIpa = new InstIpa(opCode);
  229. offset = opIpa.Imm10;
  230. indexed = opIpa.Idx;
  231. }
  232. if (indexed)
  233. {
  234. if (isStore)
  235. {
  236. config.SetAllOutputUserAttributes();
  237. }
  238. else
  239. {
  240. config.SetAllInputUserAttributes();
  241. }
  242. }
  243. else
  244. {
  245. for (int elemIndex = 0; elemIndex < count; elemIndex++)
  246. {
  247. int attr = offset + elemIndex * 4;
  248. if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
  249. {
  250. int index = (attr - AttributeConsts.UserAttributeBase) / 16;
  251. if (isStore)
  252. {
  253. config.SetOutputUserAttribute(index);
  254. }
  255. else
  256. {
  257. config.SetInputUserAttribute(index);
  258. }
  259. }
  260. }
  261. }
  262. }
  263. public static bool IsUnconditionalBranch(ref InstOp op)
  264. {
  265. return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
  266. }
  267. private static bool IsUnconditional(ref InstOp op)
  268. {
  269. InstConditional condOp = new InstConditional(op.RawOpCode);
  270. if (op.Name == InstName.Exit && condOp.Ccc != Ccc.T)
  271. {
  272. return false;
  273. }
  274. return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
  275. }
  276. private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
  277. {
  278. bool hasNewTarget = false;
  279. foreach (Block block in blocks)
  280. {
  281. InstOp lastOp = block.GetLastOp();
  282. bool hasNext = block.HasNext();
  283. if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
  284. {
  285. InstBrx opBrx = new InstBrx(lastOp.RawOpCode);
  286. ulong baseOffset = lastOp.GetAbsoluteAddress();
  287. // An indirect branch could go anywhere,
  288. // try to get the possible target offsets from the constant buffer.
  289. (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
  290. if (cbOffsetsCount != 0)
  291. {
  292. hasNewTarget = true;
  293. }
  294. for (int i = 0; i < cbOffsetsCount; i++)
  295. {
  296. uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
  297. Block target = getBlock(baseOffset + targetOffset);
  298. target.Predecessors.Add(block);
  299. block.Successors.Add(target);
  300. }
  301. }
  302. }
  303. return hasNewTarget;
  304. }
  305. private static (int, int) FindBrxTargetRange(Block block, int brxReg)
  306. {
  307. // Try to match the following pattern:
  308. //
  309. // IMNMX.U32 Rx, Rx, UpperBound, PT
  310. // SHL Rx, Rx, 0x2
  311. // LDC Rx, c[0x1][Rx+BaseOffset]
  312. //
  313. // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
  314. // The above pattern is assumed to be generated by the compiler before BRX,
  315. // as the instruction is usually used to implement jump tables for switch statement optimizations.
  316. // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
  317. // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
  318. HashSet<Block> visited = new HashSet<Block>();
  319. var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
  320. if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
  321. {
  322. return (0, 0);
  323. }
  324. GetOp<InstLdc>(ldcLocation, out var opLdc);
  325. if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
  326. {
  327. return (0, 0);
  328. }
  329. var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
  330. if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
  331. {
  332. return (0, 0);
  333. }
  334. GetOp<InstShlI>(shlLocation, out var opShl);
  335. if (opShl.Imm20 != 2)
  336. {
  337. return (0, 0);
  338. }
  339. var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
  340. if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
  341. {
  342. return (0, 0);
  343. }
  344. GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
  345. if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
  346. {
  347. return (0, 0);
  348. }
  349. return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
  350. }
  351. private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
  352. {
  353. ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
  354. op = Unsafe.As<ulong, T>(ref rawOp);
  355. }
  356. private struct BlockLocation
  357. {
  358. public Block Block { get; }
  359. public int Index { get; }
  360. public BlockLocation(Block block, int index)
  361. {
  362. Block = block;
  363. Index = index;
  364. }
  365. public bool IsImmInst(InstName name)
  366. {
  367. InstOp op = Block.OpCodes[Index];
  368. return op.Name == name && op.Props.HasFlag(InstProps.Ib);
  369. }
  370. }
  371. private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
  372. {
  373. Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
  374. toVisit.Enqueue(location);
  375. visited.Add(location.Block);
  376. while (toVisit.TryDequeue(out var currentLocation))
  377. {
  378. Block block = currentLocation.Block;
  379. for (int i = currentLocation.Index - 1; i >= 0; i--)
  380. {
  381. if (WritesToRegister(block.OpCodes[i], regIndex))
  382. {
  383. return new BlockLocation(block, i);
  384. }
  385. }
  386. foreach (Block predecessor in block.Predecessors)
  387. {
  388. if (visited.Add(predecessor))
  389. {
  390. toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
  391. }
  392. }
  393. }
  394. return new BlockLocation(null, 0);
  395. }
  396. private static bool WritesToRegister(InstOp op, int regIndex)
  397. {
  398. // Predicate instruction only ever writes to predicate, so we shouldn't check those.
  399. if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
  400. {
  401. return false;
  402. }
  403. if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
  404. {
  405. return true;
  406. }
  407. return (byte)op.RawOpCode == regIndex;
  408. }
  409. private enum MergeType
  410. {
  411. Brk = 0,
  412. Sync = 1
  413. }
  414. private struct PathBlockState
  415. {
  416. public Block Block { get; }
  417. private enum RestoreType
  418. {
  419. None,
  420. PopPushOp,
  421. PushBranchOp
  422. }
  423. private RestoreType _restoreType;
  424. private ulong _restoreValue;
  425. private MergeType _restoreMergeType;
  426. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  427. public PathBlockState(Block block)
  428. {
  429. Block = block;
  430. _restoreType = RestoreType.None;
  431. _restoreValue = 0;
  432. _restoreMergeType = default;
  433. }
  434. public PathBlockState(int oldStackSize)
  435. {
  436. Block = null;
  437. _restoreType = RestoreType.PopPushOp;
  438. _restoreValue = (ulong)oldStackSize;
  439. _restoreMergeType = default;
  440. }
  441. public PathBlockState(ulong syncAddress, MergeType mergeType)
  442. {
  443. Block = null;
  444. _restoreType = RestoreType.PushBranchOp;
  445. _restoreValue = syncAddress;
  446. _restoreMergeType = mergeType;
  447. }
  448. public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
  449. {
  450. if (_restoreType == RestoreType.PushBranchOp)
  451. {
  452. branchStack.Push((_restoreValue, _restoreMergeType));
  453. }
  454. else if (_restoreType == RestoreType.PopPushOp)
  455. {
  456. while (branchStack.Count > (uint)_restoreValue)
  457. {
  458. branchStack.Pop();
  459. }
  460. }
  461. }
  462. }
  463. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  464. {
  465. PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
  466. InstOp pushOp = pushOpInfo.Op;
  467. Block target = blocks[pushOp.GetAbsoluteAddress()];
  468. Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
  469. HashSet<Block> visited = new HashSet<Block>();
  470. Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
  471. void Push(PathBlockState pbs)
  472. {
  473. // When block is null, this means we are pushing a restore operation.
  474. // Restore operations are used to undo the work done inside a block
  475. // when we return from it, for example it pops addresses pushed by
  476. // SSY/PBK instructions inside the block, and pushes addresses poped
  477. // by SYNC/BRK.
  478. // For blocks, if it's already visited, we just ignore to avoid going
  479. // around in circles and getting stuck here.
  480. if (pbs.Block == null || !visited.Contains(pbs.Block))
  481. {
  482. workQueue.Push(pbs);
  483. }
  484. }
  485. Push(new PathBlockState(currBlock));
  486. while (workQueue.TryPop(out PathBlockState pbs))
  487. {
  488. if (pbs.ReturningFromVisit)
  489. {
  490. pbs.RestoreStackState(branchStack);
  491. continue;
  492. }
  493. Block current = pbs.Block;
  494. // If the block was already processed, we just ignore it, otherwise
  495. // we would push the same child blocks of an already processed block,
  496. // and go around in circles until memory is exhausted.
  497. if (!visited.Add(current))
  498. {
  499. continue;
  500. }
  501. int pushOpsCount = current.PushOpCodes.Count;
  502. if (pushOpsCount != 0)
  503. {
  504. Push(new PathBlockState(branchStack.Count));
  505. for (int index = pushOpIndex; index < pushOpsCount; index++)
  506. {
  507. InstOp currentPushOp = current.PushOpCodes[index].Op;
  508. MergeType pushMergeType = currentPushOp.Name == InstName.Ssy ? MergeType.Sync : MergeType.Brk;
  509. branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
  510. }
  511. }
  512. pushOpIndex = 0;
  513. bool hasNext = current.HasNext();
  514. if (hasNext)
  515. {
  516. Push(new PathBlockState(current.Successors[0]));
  517. }
  518. InstOp lastOp = current.GetLastOp();
  519. if (lastOp.Name == InstName.Sync || lastOp.Name == InstName.Brk)
  520. {
  521. MergeType popMergeType = lastOp.Name == InstName.Sync ? MergeType.Sync : MergeType.Brk;
  522. bool found = true;
  523. ulong targetAddress = 0UL;
  524. MergeType mergeType;
  525. do
  526. {
  527. if (branchStack.Count == 0)
  528. {
  529. found = false;
  530. break;
  531. }
  532. (targetAddress, mergeType) = branchStack.Pop();
  533. // Push the target address (this will be used to push the address
  534. // back into the SSY/PBK stack when we return from that block),
  535. Push(new PathBlockState(targetAddress, mergeType));
  536. }
  537. while (mergeType != popMergeType);
  538. // Make sure we found the correct address,
  539. // the push and pop instruction types must match, so:
  540. // - BRK can only consume addresses pushed by PBK.
  541. // - SYNC can only consume addresses pushed by SSY.
  542. if (found)
  543. {
  544. if (branchStack.Count == 0)
  545. {
  546. // If the entire stack was consumed, then the current pop instruction
  547. // just consumed the address from our push instruction.
  548. if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
  549. {
  550. pushOpInfo.Consumers.Add(current, Local());
  551. target.Predecessors.Add(current);
  552. current.Successors.Add(target);
  553. }
  554. }
  555. else
  556. {
  557. // Push the block itself into the work queue for processing.
  558. Push(new PathBlockState(blocks[targetAddress]));
  559. }
  560. }
  561. }
  562. else
  563. {
  564. // By adding them in descending order (sorted by address), we process the blocks
  565. // in order (of ascending address), since we work with a LIFO.
  566. foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
  567. {
  568. if (!hasNext || possibleTarget != current.Successors[0])
  569. {
  570. Push(new PathBlockState(possibleTarget));
  571. }
  572. }
  573. }
  574. }
  575. }
  576. }
  577. }