Decoder.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. using Ryujinx.Graphics.Shader.Translation;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Runtime.CompilerServices;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Decoders
  8. {
  9. static class Decoder
  10. {
  11. public static Block[][] Decode(ShaderConfig config, ulong startAddress)
  12. {
  13. List<Block[]> funcs = new List<Block[]>();
  14. Queue<ulong> funcQueue = new Queue<ulong>();
  15. HashSet<ulong> funcVisited = new HashSet<ulong>();
  16. void EnqueueFunction(ulong funcAddress)
  17. {
  18. if (funcVisited.Add(funcAddress))
  19. {
  20. funcQueue.Enqueue(funcAddress);
  21. }
  22. }
  23. funcQueue.Enqueue(0);
  24. while (funcQueue.TryDequeue(out ulong funcAddress))
  25. {
  26. List<Block> blocks = new List<Block>();
  27. Queue<Block> workQueue = new Queue<Block>();
  28. Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
  29. Block GetBlock(ulong blkAddress)
  30. {
  31. if (!visited.TryGetValue(blkAddress, out Block block))
  32. {
  33. block = new Block(blkAddress);
  34. workQueue.Enqueue(block);
  35. visited.Add(blkAddress, block);
  36. }
  37. return block;
  38. }
  39. GetBlock(funcAddress);
  40. bool hasNewTarget;
  41. do
  42. {
  43. while (workQueue.TryDequeue(out Block currBlock))
  44. {
  45. // Check if the current block is inside another block.
  46. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  47. {
  48. Block nBlock = blocks[nBlkIndex];
  49. if (nBlock.Address == currBlock.Address)
  50. {
  51. throw new InvalidOperationException("Found duplicate block address on the list.");
  52. }
  53. nBlock.Split(currBlock);
  54. blocks.Insert(nBlkIndex + 1, currBlock);
  55. continue;
  56. }
  57. // If we have a block after the current one, set the limit address.
  58. ulong limitAddress = ulong.MaxValue;
  59. if (nBlkIndex != blocks.Count)
  60. {
  61. Block nBlock = blocks[nBlkIndex];
  62. int nextIndex = nBlkIndex + 1;
  63. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  64. {
  65. limitAddress = blocks[nextIndex].Address;
  66. }
  67. else if (nBlock.Address > currBlock.Address)
  68. {
  69. limitAddress = blocks[nBlkIndex].Address;
  70. }
  71. }
  72. FillBlock(config, currBlock, limitAddress, startAddress);
  73. if (currBlock.OpCodes.Count != 0)
  74. {
  75. // We should have blocks for all possible branch targets,
  76. // including those from SSY/PBK instructions.
  77. foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
  78. {
  79. GetBlock(pushOp.Op.GetAbsoluteAddress());
  80. }
  81. // Set child blocks. "Branch" is the block the branch instruction
  82. // points to (when taken), "Next" is the block at the next address,
  83. // executed when the branch is not taken. For Unconditional Branches
  84. // or end of program, Next is null.
  85. InstOp lastOp = currBlock.GetLastOp();
  86. if (lastOp.Name == InstName.Cal)
  87. {
  88. EnqueueFunction(lastOp.GetAbsoluteAddress());
  89. }
  90. else if (lastOp.Name == InstName.Bra)
  91. {
  92. Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
  93. currBlock.Successors.Add(succBlock);
  94. succBlock.Predecessors.Add(currBlock);
  95. }
  96. if (!IsUnconditionalBranch(ref lastOp))
  97. {
  98. Block succBlock = GetBlock(currBlock.EndAddress);
  99. currBlock.Successors.Insert(0, succBlock);
  100. succBlock.Predecessors.Add(currBlock);
  101. }
  102. }
  103. // Insert the new block on the list (sorted by address).
  104. if (blocks.Count != 0)
  105. {
  106. Block nBlock = blocks[nBlkIndex];
  107. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  108. }
  109. else
  110. {
  111. blocks.Add(currBlock);
  112. }
  113. }
  114. // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
  115. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  116. {
  117. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  118. {
  119. PropagatePushOp(visited, block, pushOpIndex);
  120. }
  121. }
  122. // Try to find targets for BRX (indirect branch) instructions.
  123. hasNewTarget = FindBrxTargets(config, blocks, GetBlock);
  124. // If we discovered new branch targets from the BRX instruction,
  125. // we need another round of decoding to decode the new blocks.
  126. // Additionally, we may have more SSY/PBK targets to propagate,
  127. // and new BRX instructions.
  128. }
  129. while (hasNewTarget);
  130. funcs.Add(blocks.ToArray());
  131. }
  132. return funcs.ToArray();
  133. }
  134. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  135. {
  136. index = 0;
  137. int left = 0;
  138. int right = blocks.Count - 1;
  139. while (left <= right)
  140. {
  141. int size = right - left;
  142. int middle = left + (size >> 1);
  143. Block block = blocks[middle];
  144. index = middle;
  145. if (address >= block.Address && address < block.EndAddress)
  146. {
  147. return true;
  148. }
  149. if (address < block.Address)
  150. {
  151. right = middle - 1;
  152. }
  153. else
  154. {
  155. left = middle + 1;
  156. }
  157. }
  158. return false;
  159. }
  160. private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
  161. {
  162. IGpuAccessor gpuAccessor = config.GpuAccessor;
  163. ulong address = block.Address;
  164. int bufferOffset = 0;
  165. ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
  166. InstOp op = default;
  167. do
  168. {
  169. if (address + 7 >= limitAddress)
  170. {
  171. break;
  172. }
  173. // Ignore scheduling instructions, which are written every 32 bytes.
  174. if ((address & 0x1f) == 0)
  175. {
  176. address += 8;
  177. bufferOffset++;
  178. continue;
  179. }
  180. if (bufferOffset >= buffer.Length)
  181. {
  182. buffer = gpuAccessor.GetCode(startAddress + address, 8);
  183. bufferOffset = 0;
  184. }
  185. ulong opCode = buffer[bufferOffset++];
  186. op = InstTable.GetOp(address, opCode);
  187. if (op.Props.HasFlag(InstProps.TexB))
  188. {
  189. config.SetUsedFeature(FeatureFlags.Bindless);
  190. }
  191. if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
  192. {
  193. SetUserAttributeUses(config, op.Name, opCode);
  194. }
  195. else if (op.Name == InstName.Ssy || op.Name == InstName.Pbk)
  196. {
  197. block.AddPushOp(op);
  198. }
  199. block.OpCodes.Add(op);
  200. address += 8;
  201. }
  202. while (!op.Props.HasFlag(InstProps.Bra));
  203. block.EndAddress = address;
  204. }
  205. private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode)
  206. {
  207. int offset;
  208. int count = 1;
  209. bool isStore = false;
  210. bool indexed = false;
  211. bool perPatch = false;
  212. if (name == InstName.Ast)
  213. {
  214. InstAst opAst = new InstAst(opCode);
  215. count = (int)opAst.AlSize + 1;
  216. offset = opAst.Imm11;
  217. indexed = opAst.Phys;
  218. perPatch = opAst.P;
  219. isStore = true;
  220. }
  221. else if (name == InstName.Ald)
  222. {
  223. InstAld opAld = new InstAld(opCode);
  224. count = (int)opAld.AlSize + 1;
  225. offset = opAld.Imm11;
  226. indexed = opAld.Phys;
  227. perPatch = opAld.P;
  228. isStore = opAld.O;
  229. }
  230. else /* if (name == InstName.Ipa) */
  231. {
  232. InstIpa opIpa = new InstIpa(opCode);
  233. offset = opIpa.Imm10;
  234. indexed = opIpa.Idx;
  235. }
  236. if (indexed)
  237. {
  238. if (isStore)
  239. {
  240. config.SetAllOutputUserAttributes();
  241. }
  242. else
  243. {
  244. config.SetAllInputUserAttributes();
  245. }
  246. }
  247. else
  248. {
  249. for (int elemIndex = 0; elemIndex < count; elemIndex++)
  250. {
  251. int attr = offset + elemIndex * 4;
  252. if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
  253. {
  254. int index = (attr - AttributeConsts.UserAttributeBase) / 16;
  255. if (isStore)
  256. {
  257. config.SetOutputUserAttribute(index, perPatch);
  258. }
  259. else
  260. {
  261. config.SetInputUserAttribute(index, perPatch);
  262. }
  263. }
  264. }
  265. }
  266. }
  267. public static bool IsUnconditionalBranch(ref InstOp op)
  268. {
  269. return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
  270. }
  271. private static bool IsUnconditional(ref InstOp op)
  272. {
  273. InstConditional condOp = new InstConditional(op.RawOpCode);
  274. if (op.Name == InstName.Exit && condOp.Ccc != Ccc.T)
  275. {
  276. return false;
  277. }
  278. return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
  279. }
  280. private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
  281. {
  282. bool hasNewTarget = false;
  283. foreach (Block block in blocks)
  284. {
  285. InstOp lastOp = block.GetLastOp();
  286. bool hasNext = block.HasNext();
  287. if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
  288. {
  289. InstBrx opBrx = new InstBrx(lastOp.RawOpCode);
  290. ulong baseOffset = lastOp.GetAbsoluteAddress();
  291. // An indirect branch could go anywhere,
  292. // try to get the possible target offsets from the constant buffer.
  293. (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
  294. if (cbOffsetsCount != 0)
  295. {
  296. hasNewTarget = true;
  297. }
  298. for (int i = 0; i < cbOffsetsCount; i++)
  299. {
  300. uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
  301. Block target = getBlock(baseOffset + targetOffset);
  302. target.Predecessors.Add(block);
  303. block.Successors.Add(target);
  304. }
  305. }
  306. }
  307. return hasNewTarget;
  308. }
  309. private static (int, int) FindBrxTargetRange(Block block, int brxReg)
  310. {
  311. // Try to match the following pattern:
  312. //
  313. // IMNMX.U32 Rx, Rx, UpperBound, PT
  314. // SHL Rx, Rx, 0x2
  315. // LDC Rx, c[0x1][Rx+BaseOffset]
  316. //
  317. // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
  318. // The above pattern is assumed to be generated by the compiler before BRX,
  319. // as the instruction is usually used to implement jump tables for switch statement optimizations.
  320. // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
  321. // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
  322. HashSet<Block> visited = new HashSet<Block>();
  323. var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
  324. if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
  325. {
  326. return (0, 0);
  327. }
  328. GetOp<InstLdc>(ldcLocation, out var opLdc);
  329. if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
  330. {
  331. return (0, 0);
  332. }
  333. var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
  334. if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
  335. {
  336. return (0, 0);
  337. }
  338. GetOp<InstShlI>(shlLocation, out var opShl);
  339. if (opShl.Imm20 != 2)
  340. {
  341. return (0, 0);
  342. }
  343. var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
  344. if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
  345. {
  346. return (0, 0);
  347. }
  348. GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
  349. if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
  350. {
  351. return (0, 0);
  352. }
  353. return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
  354. }
  355. private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
  356. {
  357. ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
  358. op = Unsafe.As<ulong, T>(ref rawOp);
  359. }
  360. private struct BlockLocation
  361. {
  362. public Block Block { get; }
  363. public int Index { get; }
  364. public BlockLocation(Block block, int index)
  365. {
  366. Block = block;
  367. Index = index;
  368. }
  369. public bool IsImmInst(InstName name)
  370. {
  371. InstOp op = Block.OpCodes[Index];
  372. return op.Name == name && op.Props.HasFlag(InstProps.Ib);
  373. }
  374. }
  375. private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
  376. {
  377. Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
  378. toVisit.Enqueue(location);
  379. visited.Add(location.Block);
  380. while (toVisit.TryDequeue(out var currentLocation))
  381. {
  382. Block block = currentLocation.Block;
  383. for (int i = currentLocation.Index - 1; i >= 0; i--)
  384. {
  385. if (WritesToRegister(block.OpCodes[i], regIndex))
  386. {
  387. return new BlockLocation(block, i);
  388. }
  389. }
  390. foreach (Block predecessor in block.Predecessors)
  391. {
  392. if (visited.Add(predecessor))
  393. {
  394. toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
  395. }
  396. }
  397. }
  398. return new BlockLocation(null, 0);
  399. }
  400. private static bool WritesToRegister(InstOp op, int regIndex)
  401. {
  402. // Predicate instruction only ever writes to predicate, so we shouldn't check those.
  403. if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
  404. {
  405. return false;
  406. }
  407. if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
  408. {
  409. return true;
  410. }
  411. return (byte)op.RawOpCode == regIndex;
  412. }
  413. private enum MergeType
  414. {
  415. Brk = 0,
  416. Sync = 1
  417. }
  418. private struct PathBlockState
  419. {
  420. public Block Block { get; }
  421. private enum RestoreType
  422. {
  423. None,
  424. PopPushOp,
  425. PushBranchOp
  426. }
  427. private RestoreType _restoreType;
  428. private ulong _restoreValue;
  429. private MergeType _restoreMergeType;
  430. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  431. public PathBlockState(Block block)
  432. {
  433. Block = block;
  434. _restoreType = RestoreType.None;
  435. _restoreValue = 0;
  436. _restoreMergeType = default;
  437. }
  438. public PathBlockState(int oldStackSize)
  439. {
  440. Block = null;
  441. _restoreType = RestoreType.PopPushOp;
  442. _restoreValue = (ulong)oldStackSize;
  443. _restoreMergeType = default;
  444. }
  445. public PathBlockState(ulong syncAddress, MergeType mergeType)
  446. {
  447. Block = null;
  448. _restoreType = RestoreType.PushBranchOp;
  449. _restoreValue = syncAddress;
  450. _restoreMergeType = mergeType;
  451. }
  452. public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
  453. {
  454. if (_restoreType == RestoreType.PushBranchOp)
  455. {
  456. branchStack.Push((_restoreValue, _restoreMergeType));
  457. }
  458. else if (_restoreType == RestoreType.PopPushOp)
  459. {
  460. while (branchStack.Count > (uint)_restoreValue)
  461. {
  462. branchStack.Pop();
  463. }
  464. }
  465. }
  466. }
  467. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  468. {
  469. PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
  470. InstOp pushOp = pushOpInfo.Op;
  471. Block target = blocks[pushOp.GetAbsoluteAddress()];
  472. Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
  473. HashSet<Block> visited = new HashSet<Block>();
  474. Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
  475. void Push(PathBlockState pbs)
  476. {
  477. // When block is null, this means we are pushing a restore operation.
  478. // Restore operations are used to undo the work done inside a block
  479. // when we return from it, for example it pops addresses pushed by
  480. // SSY/PBK instructions inside the block, and pushes addresses poped
  481. // by SYNC/BRK.
  482. // For blocks, if it's already visited, we just ignore to avoid going
  483. // around in circles and getting stuck here.
  484. if (pbs.Block == null || !visited.Contains(pbs.Block))
  485. {
  486. workQueue.Push(pbs);
  487. }
  488. }
  489. Push(new PathBlockState(currBlock));
  490. while (workQueue.TryPop(out PathBlockState pbs))
  491. {
  492. if (pbs.ReturningFromVisit)
  493. {
  494. pbs.RestoreStackState(branchStack);
  495. continue;
  496. }
  497. Block current = pbs.Block;
  498. // If the block was already processed, we just ignore it, otherwise
  499. // we would push the same child blocks of an already processed block,
  500. // and go around in circles until memory is exhausted.
  501. if (!visited.Add(current))
  502. {
  503. continue;
  504. }
  505. int pushOpsCount = current.PushOpCodes.Count;
  506. if (pushOpsCount != 0)
  507. {
  508. Push(new PathBlockState(branchStack.Count));
  509. for (int index = pushOpIndex; index < pushOpsCount; index++)
  510. {
  511. InstOp currentPushOp = current.PushOpCodes[index].Op;
  512. MergeType pushMergeType = currentPushOp.Name == InstName.Ssy ? MergeType.Sync : MergeType.Brk;
  513. branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
  514. }
  515. }
  516. pushOpIndex = 0;
  517. bool hasNext = current.HasNext();
  518. if (hasNext)
  519. {
  520. Push(new PathBlockState(current.Successors[0]));
  521. }
  522. InstOp lastOp = current.GetLastOp();
  523. if (lastOp.Name == InstName.Sync || lastOp.Name == InstName.Brk)
  524. {
  525. MergeType popMergeType = lastOp.Name == InstName.Sync ? MergeType.Sync : MergeType.Brk;
  526. bool found = true;
  527. ulong targetAddress = 0UL;
  528. MergeType mergeType;
  529. do
  530. {
  531. if (branchStack.Count == 0)
  532. {
  533. found = false;
  534. break;
  535. }
  536. (targetAddress, mergeType) = branchStack.Pop();
  537. // Push the target address (this will be used to push the address
  538. // back into the SSY/PBK stack when we return from that block),
  539. Push(new PathBlockState(targetAddress, mergeType));
  540. }
  541. while (mergeType != popMergeType);
  542. // Make sure we found the correct address,
  543. // the push and pop instruction types must match, so:
  544. // - BRK can only consume addresses pushed by PBK.
  545. // - SYNC can only consume addresses pushed by SSY.
  546. if (found)
  547. {
  548. if (branchStack.Count == 0)
  549. {
  550. // If the entire stack was consumed, then the current pop instruction
  551. // just consumed the address from our push instruction.
  552. if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
  553. {
  554. pushOpInfo.Consumers.Add(current, Local());
  555. target.Predecessors.Add(current);
  556. current.Successors.Add(target);
  557. }
  558. }
  559. else
  560. {
  561. // Push the block itself into the work queue for processing.
  562. Push(new PathBlockState(blocks[targetAddress]));
  563. }
  564. }
  565. }
  566. else
  567. {
  568. // By adding them in descending order (sorted by address), we process the blocks
  569. // in order (of ascending address), since we work with a LIFO.
  570. foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
  571. {
  572. if (!hasNext || possibleTarget != current.Successors[0])
  573. {
  574. Push(new PathBlockState(possibleTarget));
  575. }
  576. }
  577. }
  578. }
  579. }
  580. }
  581. }