Decoder.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. using Ryujinx.Graphics.Shader.Translation;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Linq;
  5. using System.Runtime.CompilerServices;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. namespace Ryujinx.Graphics.Shader.Decoders
  8. {
  9. static class Decoder
  10. {
  11. private class Context
  12. {
  13. public AttributeUsage AttributeUsage { get; }
  14. public FeatureFlags UsedFeatures { get; private set; }
  15. public byte ClipDistancesWritten { get; private set; }
  16. public int Cb1DataSize { get; private set; }
  17. private readonly IGpuAccessor _gpuAccessor;
  18. public Context(IGpuAccessor gpuAccessor)
  19. {
  20. _gpuAccessor = gpuAccessor;
  21. AttributeUsage = new(gpuAccessor);
  22. }
  23. public uint ConstantBuffer1Read(int offset)
  24. {
  25. if (Cb1DataSize < offset + 4)
  26. {
  27. Cb1DataSize = offset + 4;
  28. }
  29. return _gpuAccessor.ConstantBuffer1Read(offset);
  30. }
  31. public void SetUsedFeature(FeatureFlags flags)
  32. {
  33. UsedFeatures |= flags;
  34. }
  35. public void SetClipDistanceWritten(int index)
  36. {
  37. ClipDistancesWritten |= (byte)(1 << index);
  38. }
  39. }
  40. public static DecodedProgram Decode(ShaderDefinitions definitions, IGpuAccessor gpuAccessor, ulong startAddress)
  41. {
  42. Context context = new(gpuAccessor);
  43. Queue<DecodedFunction> functionsQueue = new();
  44. Dictionary<ulong, DecodedFunction> functionsVisited = new();
  45. DecodedFunction EnqueueFunction(ulong address)
  46. {
  47. if (!functionsVisited.TryGetValue(address, out DecodedFunction function))
  48. {
  49. functionsVisited.Add(address, function = new DecodedFunction(address));
  50. functionsQueue.Enqueue(function);
  51. }
  52. return function;
  53. }
  54. DecodedFunction mainFunction = EnqueueFunction(0);
  55. while (functionsQueue.TryDequeue(out DecodedFunction currentFunction))
  56. {
  57. List<Block> blocks = new();
  58. Queue<Block> workQueue = new();
  59. Dictionary<ulong, Block> visited = new();
  60. Block GetBlock(ulong blkAddress)
  61. {
  62. if (!visited.TryGetValue(blkAddress, out Block block))
  63. {
  64. block = new Block(blkAddress);
  65. workQueue.Enqueue(block);
  66. visited.Add(blkAddress, block);
  67. }
  68. return block;
  69. }
  70. GetBlock(currentFunction.Address);
  71. bool hasNewTarget;
  72. do
  73. {
  74. while (workQueue.TryDequeue(out Block currBlock))
  75. {
  76. // Check if the current block is inside another block.
  77. if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
  78. {
  79. Block nBlock = blocks[nBlkIndex];
  80. if (nBlock.Address == currBlock.Address)
  81. {
  82. throw new InvalidOperationException("Found duplicate block address on the list.");
  83. }
  84. nBlock.Split(currBlock);
  85. blocks.Insert(nBlkIndex + 1, currBlock);
  86. continue;
  87. }
  88. // If we have a block after the current one, set the limit address.
  89. ulong limitAddress = ulong.MaxValue;
  90. if (nBlkIndex != blocks.Count)
  91. {
  92. Block nBlock = blocks[nBlkIndex];
  93. int nextIndex = nBlkIndex + 1;
  94. if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
  95. {
  96. limitAddress = blocks[nextIndex].Address;
  97. }
  98. else if (nBlock.Address > currBlock.Address)
  99. {
  100. limitAddress = blocks[nBlkIndex].Address;
  101. }
  102. }
  103. FillBlock(definitions, gpuAccessor, context, currBlock, limitAddress, startAddress);
  104. if (currBlock.OpCodes.Count != 0)
  105. {
  106. // We should have blocks for all possible branch targets,
  107. // including those from PBK/PCNT/SSY instructions.
  108. foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
  109. {
  110. GetBlock(pushOp.Op.GetAbsoluteAddress());
  111. }
  112. // Set child blocks. "Branch" is the block the branch instruction
  113. // points to (when taken), "Next" is the block at the next address,
  114. // executed when the branch is not taken. For Unconditional Branches
  115. // or end of program, Next is null.
  116. InstOp lastOp = currBlock.GetLastOp();
  117. if (lastOp.Name == InstName.Cal)
  118. {
  119. EnqueueFunction(lastOp.GetAbsoluteAddress()).AddCaller(currentFunction);
  120. }
  121. else if (lastOp.Name == InstName.Bra)
  122. {
  123. Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
  124. currBlock.Successors.Add(succBlock);
  125. succBlock.Predecessors.Add(currBlock);
  126. }
  127. if (!IsUnconditionalBranch(ref lastOp))
  128. {
  129. Block succBlock = GetBlock(currBlock.EndAddress);
  130. currBlock.Successors.Insert(0, succBlock);
  131. succBlock.Predecessors.Add(currBlock);
  132. }
  133. }
  134. // Insert the new block on the list (sorted by address).
  135. if (blocks.Count != 0)
  136. {
  137. Block nBlock = blocks[nBlkIndex];
  138. blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
  139. }
  140. else
  141. {
  142. blocks.Add(currBlock);
  143. }
  144. }
  145. // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
  146. foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
  147. {
  148. for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
  149. {
  150. PropagatePushOp(visited, block, pushOpIndex);
  151. }
  152. }
  153. // Try to find targets for BRX (indirect branch) instructions.
  154. hasNewTarget = FindBrxTargets(context, blocks, GetBlock);
  155. // If we discovered new branch targets from the BRX instruction,
  156. // we need another round of decoding to decode the new blocks.
  157. // Additionally, we may have more SSY/PBK targets to propagate,
  158. // and new BRX instructions.
  159. }
  160. while (hasNewTarget);
  161. currentFunction.SetBlocks(blocks.ToArray());
  162. }
  163. return new DecodedProgram(
  164. mainFunction,
  165. functionsVisited,
  166. context.AttributeUsage,
  167. context.UsedFeatures,
  168. context.ClipDistancesWritten,
  169. context.Cb1DataSize);
  170. }
  171. private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
  172. {
  173. index = 0;
  174. int left = 0;
  175. int right = blocks.Count - 1;
  176. while (left <= right)
  177. {
  178. int size = right - left;
  179. int middle = left + (size >> 1);
  180. Block block = blocks[middle];
  181. index = middle;
  182. if (address >= block.Address && address < block.EndAddress)
  183. {
  184. return true;
  185. }
  186. if (address < block.Address)
  187. {
  188. right = middle - 1;
  189. }
  190. else
  191. {
  192. left = middle + 1;
  193. }
  194. }
  195. return false;
  196. }
  197. private static void FillBlock(
  198. ShaderDefinitions definitions,
  199. IGpuAccessor gpuAccessor,
  200. Context context,
  201. Block block,
  202. ulong limitAddress,
  203. ulong startAddress)
  204. {
  205. ulong address = block.Address;
  206. int bufferOffset = 0;
  207. ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
  208. InstOp op = default;
  209. do
  210. {
  211. if (address + 7 >= limitAddress)
  212. {
  213. break;
  214. }
  215. // Ignore scheduling instructions, which are written every 32 bytes.
  216. if ((address & 0x1f) == 0)
  217. {
  218. address += 8;
  219. bufferOffset++;
  220. continue;
  221. }
  222. if (bufferOffset >= buffer.Length)
  223. {
  224. buffer = gpuAccessor.GetCode(startAddress + address, 8);
  225. bufferOffset = 0;
  226. }
  227. ulong opCode = buffer[bufferOffset++];
  228. op = InstTable.GetOp(address, opCode);
  229. if (op.Props.HasFlag(InstProps.TexB))
  230. {
  231. context.SetUsedFeature(FeatureFlags.Bindless);
  232. }
  233. switch (op.Name)
  234. {
  235. case InstName.Ald:
  236. case InstName.Ast:
  237. case InstName.Ipa:
  238. SetUserAttributeUses(definitions, context, op.Name, opCode);
  239. break;
  240. case InstName.Pbk:
  241. case InstName.Pcnt:
  242. case InstName.Ssy:
  243. block.AddPushOp(op);
  244. break;
  245. case InstName.Shfl:
  246. context.SetUsedFeature(FeatureFlags.Shuffle);
  247. break;
  248. case InstName.Ldl:
  249. case InstName.Stl:
  250. context.SetUsedFeature(FeatureFlags.LocalMemory);
  251. break;
  252. case InstName.Atoms:
  253. case InstName.AtomsCas:
  254. case InstName.Lds:
  255. case InstName.Sts:
  256. context.SetUsedFeature(FeatureFlags.SharedMemory);
  257. break;
  258. case InstName.Atom:
  259. case InstName.AtomCas:
  260. case InstName.Red:
  261. case InstName.Stg:
  262. case InstName.Suatom:
  263. case InstName.SuatomB:
  264. case InstName.SuatomB2:
  265. case InstName.SuatomCas:
  266. case InstName.SuatomCasB:
  267. case InstName.Sured:
  268. case InstName.SuredB:
  269. case InstName.Sust:
  270. case InstName.SustB:
  271. case InstName.SustD:
  272. case InstName.SustDB:
  273. context.SetUsedFeature(FeatureFlags.Store);
  274. break;
  275. }
  276. block.OpCodes.Add(op);
  277. address += 8;
  278. }
  279. while (!op.Props.HasFlag(InstProps.Bra));
  280. block.EndAddress = address;
  281. }
  282. private static void SetUserAttributeUses(ShaderDefinitions definitions, Context context, InstName name, ulong opCode)
  283. {
  284. int offset;
  285. int count = 1;
  286. bool isStore = false;
  287. bool indexed;
  288. bool perPatch = false;
  289. if (name == InstName.Ast)
  290. {
  291. InstAst opAst = new(opCode);
  292. count = (int)opAst.AlSize + 1;
  293. offset = opAst.Imm11;
  294. indexed = opAst.Phys;
  295. perPatch = opAst.P;
  296. isStore = true;
  297. }
  298. else if (name == InstName.Ald)
  299. {
  300. InstAld opAld = new(opCode);
  301. count = (int)opAld.AlSize + 1;
  302. offset = opAld.Imm11;
  303. indexed = opAld.Phys;
  304. perPatch = opAld.P;
  305. isStore = opAld.O;
  306. }
  307. else /* if (name == InstName.Ipa) */
  308. {
  309. InstIpa opIpa = new(opCode);
  310. offset = opIpa.Imm10;
  311. indexed = opIpa.Idx;
  312. }
  313. if (indexed)
  314. {
  315. if (isStore)
  316. {
  317. context.AttributeUsage.SetAllOutputUserAttributes();
  318. definitions.EnableOutputIndexing();
  319. }
  320. else
  321. {
  322. context.AttributeUsage.SetAllInputUserAttributes();
  323. definitions.EnableInputIndexing();
  324. }
  325. }
  326. else
  327. {
  328. for (int elemIndex = 0; elemIndex < count; elemIndex++)
  329. {
  330. int attr = offset + elemIndex * 4;
  331. if (perPatch)
  332. {
  333. if (attr >= AttributeConsts.UserAttributePerPatchBase && attr < AttributeConsts.UserAttributePerPatchEnd)
  334. {
  335. int userAttr = attr - AttributeConsts.UserAttributePerPatchBase;
  336. int index = userAttr / 16;
  337. if (isStore)
  338. {
  339. context.AttributeUsage.SetOutputUserAttributePerPatch(index);
  340. }
  341. else
  342. {
  343. context.AttributeUsage.SetInputUserAttributePerPatch(index);
  344. }
  345. }
  346. }
  347. else if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
  348. {
  349. int userAttr = attr - AttributeConsts.UserAttributeBase;
  350. int index = userAttr / 16;
  351. if (isStore)
  352. {
  353. context.AttributeUsage.SetOutputUserAttribute(index);
  354. }
  355. else
  356. {
  357. context.AttributeUsage.SetInputUserAttribute(index, (userAttr >> 2) & 3);
  358. }
  359. }
  360. if (!isStore &&
  361. (attr == AttributeConsts.FogCoord ||
  362. (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) ||
  363. (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)))
  364. {
  365. context.SetUsedFeature(FeatureFlags.FixedFuncAttr);
  366. }
  367. else
  368. {
  369. if (isStore)
  370. {
  371. switch (attr)
  372. {
  373. case AttributeConsts.Layer:
  374. if (definitions.Stage != ShaderStage.Compute && definitions.Stage != ShaderStage.Fragment)
  375. {
  376. context.SetUsedFeature(FeatureFlags.RtLayer);
  377. }
  378. break;
  379. case AttributeConsts.ViewportIndex:
  380. if (definitions.Stage != ShaderStage.Fragment)
  381. {
  382. context.SetUsedFeature(FeatureFlags.ViewportIndex);
  383. }
  384. break;
  385. case AttributeConsts.ClipDistance0:
  386. case AttributeConsts.ClipDistance1:
  387. case AttributeConsts.ClipDistance2:
  388. case AttributeConsts.ClipDistance3:
  389. case AttributeConsts.ClipDistance4:
  390. case AttributeConsts.ClipDistance5:
  391. case AttributeConsts.ClipDistance6:
  392. case AttributeConsts.ClipDistance7:
  393. if (definitions.Stage.IsVtg())
  394. {
  395. context.SetClipDistanceWritten((attr - AttributeConsts.ClipDistance0) / 4);
  396. }
  397. break;
  398. case AttributeConsts.ViewportMask:
  399. if (definitions.Stage != ShaderStage.Fragment)
  400. {
  401. context.SetUsedFeature(FeatureFlags.ViewportMask);
  402. }
  403. break;
  404. }
  405. }
  406. else
  407. {
  408. switch (attr)
  409. {
  410. case AttributeConsts.PositionX:
  411. case AttributeConsts.PositionY:
  412. if (definitions.Stage == ShaderStage.Fragment)
  413. {
  414. context.SetUsedFeature(FeatureFlags.FragCoordXY);
  415. }
  416. break;
  417. case AttributeConsts.InstanceId:
  418. if (definitions.Stage == ShaderStage.Vertex)
  419. {
  420. context.SetUsedFeature(FeatureFlags.InstanceId);
  421. }
  422. break;
  423. }
  424. }
  425. }
  426. }
  427. }
  428. }
  429. public static bool IsUnconditionalBranch(ref InstOp op)
  430. {
  431. return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
  432. }
  433. private static bool IsUnconditional(ref InstOp op)
  434. {
  435. InstConditional condOp = new(op.RawOpCode);
  436. if ((op.Name == InstName.Bra || op.Name == InstName.Exit) && condOp.Ccc != Ccc.T)
  437. {
  438. return false;
  439. }
  440. return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
  441. }
  442. private static bool FindBrxTargets(Context context, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
  443. {
  444. bool hasNewTarget = false;
  445. foreach (Block block in blocks)
  446. {
  447. InstOp lastOp = block.GetLastOp();
  448. bool hasNext = block.HasNext();
  449. if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
  450. {
  451. HashSet<ulong> visited = new();
  452. InstBrx opBrx = new(lastOp.RawOpCode);
  453. ulong baseOffset = lastOp.GetAbsoluteAddress();
  454. // An indirect branch could go anywhere,
  455. // try to get the possible target offsets from the constant buffer.
  456. (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
  457. if (cbOffsetsCount != 0)
  458. {
  459. hasNewTarget = true;
  460. }
  461. for (int i = 0; i < cbOffsetsCount; i++)
  462. {
  463. uint targetOffset = context.ConstantBuffer1Read(cbBaseOffset + i * 4);
  464. ulong targetAddress = baseOffset + targetOffset;
  465. if (visited.Add(targetAddress))
  466. {
  467. Block target = getBlock(targetAddress);
  468. target.Predecessors.Add(block);
  469. block.Successors.Add(target);
  470. }
  471. }
  472. }
  473. }
  474. return hasNewTarget;
  475. }
  476. private static (int, int) FindBrxTargetRange(Block block, int brxReg)
  477. {
  478. // Try to match the following pattern:
  479. //
  480. // IMNMX.U32 Rx, Rx, UpperBound, PT
  481. // SHL Rx, Rx, 0x2
  482. // LDC Rx, c[0x1][Rx+BaseOffset]
  483. //
  484. // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
  485. // The above pattern is assumed to be generated by the compiler before BRX,
  486. // as the instruction is usually used to implement jump tables for switch statement optimizations.
  487. // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
  488. // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
  489. HashSet<Block> visited = new();
  490. var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
  491. if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
  492. {
  493. return (0, 0);
  494. }
  495. GetOp<InstLdc>(ldcLocation, out var opLdc);
  496. if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
  497. {
  498. return (0, 0);
  499. }
  500. var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
  501. if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
  502. {
  503. return (0, 0);
  504. }
  505. GetOp<InstShlI>(shlLocation, out var opShl);
  506. if (opShl.Imm20 != 2)
  507. {
  508. return (0, 0);
  509. }
  510. var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
  511. if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
  512. {
  513. return (0, 0);
  514. }
  515. GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
  516. if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
  517. {
  518. return (0, 0);
  519. }
  520. return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
  521. }
  522. private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
  523. {
  524. ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
  525. op = Unsafe.As<ulong, T>(ref rawOp);
  526. }
  527. private readonly struct BlockLocation
  528. {
  529. public Block Block { get; }
  530. public int Index { get; }
  531. public BlockLocation(Block block, int index)
  532. {
  533. Block = block;
  534. Index = index;
  535. }
  536. public bool IsImmInst(InstName name)
  537. {
  538. InstOp op = Block.OpCodes[Index];
  539. return op.Name == name && op.Props.HasFlag(InstProps.Ib);
  540. }
  541. }
  542. private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
  543. {
  544. Queue<BlockLocation> toVisit = new();
  545. toVisit.Enqueue(location);
  546. visited.Add(location.Block);
  547. while (toVisit.TryDequeue(out var currentLocation))
  548. {
  549. Block block = currentLocation.Block;
  550. for (int i = currentLocation.Index - 1; i >= 0; i--)
  551. {
  552. if (WritesToRegister(block.OpCodes[i], regIndex))
  553. {
  554. return new BlockLocation(block, i);
  555. }
  556. }
  557. foreach (Block predecessor in block.Predecessors)
  558. {
  559. if (visited.Add(predecessor))
  560. {
  561. toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
  562. }
  563. }
  564. }
  565. return new BlockLocation(null, 0);
  566. }
  567. private static bool WritesToRegister(InstOp op, int regIndex)
  568. {
  569. // Predicate instruction only ever writes to predicate, so we shouldn't check those.
  570. if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
  571. {
  572. return false;
  573. }
  574. if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
  575. {
  576. return true;
  577. }
  578. return (byte)op.RawOpCode == regIndex;
  579. }
  580. private enum MergeType
  581. {
  582. Brk,
  583. Cont,
  584. Sync,
  585. }
  586. private readonly struct PathBlockState
  587. {
  588. public Block Block { get; }
  589. private enum RestoreType
  590. {
  591. None,
  592. PopPushOp,
  593. PushBranchOp,
  594. }
  595. private readonly RestoreType _restoreType;
  596. private readonly ulong _restoreValue;
  597. private readonly MergeType _restoreMergeType;
  598. public bool ReturningFromVisit => _restoreType != RestoreType.None;
  599. public PathBlockState(Block block)
  600. {
  601. Block = block;
  602. _restoreType = RestoreType.None;
  603. _restoreValue = 0;
  604. _restoreMergeType = default;
  605. }
  606. public PathBlockState(int oldStackSize)
  607. {
  608. Block = null;
  609. _restoreType = RestoreType.PopPushOp;
  610. _restoreValue = (ulong)oldStackSize;
  611. _restoreMergeType = default;
  612. }
  613. public PathBlockState(ulong syncAddress, MergeType mergeType)
  614. {
  615. Block = null;
  616. _restoreType = RestoreType.PushBranchOp;
  617. _restoreValue = syncAddress;
  618. _restoreMergeType = mergeType;
  619. }
  620. public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
  621. {
  622. if (_restoreType == RestoreType.PushBranchOp)
  623. {
  624. branchStack.Push((_restoreValue, _restoreMergeType));
  625. }
  626. else if (_restoreType == RestoreType.PopPushOp)
  627. {
  628. while (branchStack.Count > (uint)_restoreValue)
  629. {
  630. branchStack.Pop();
  631. }
  632. }
  633. }
  634. }
  635. private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
  636. {
  637. PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
  638. InstOp pushOp = pushOpInfo.Op;
  639. Block target = blocks[pushOp.GetAbsoluteAddress()];
  640. Stack<PathBlockState> workQueue = new();
  641. HashSet<Block> visited = new();
  642. Stack<(ulong, MergeType)> branchStack = new();
  643. void Push(PathBlockState pbs)
  644. {
  645. // When block is null, this means we are pushing a restore operation.
  646. // Restore operations are used to undo the work done inside a block
  647. // when we return from it, for example it pops addresses pushed by
  648. // SSY/PBK instructions inside the block, and pushes addresses poped
  649. // by SYNC/BRK.
  650. // For blocks, if it's already visited, we just ignore to avoid going
  651. // around in circles and getting stuck here.
  652. if (pbs.Block == null || !visited.Contains(pbs.Block))
  653. {
  654. workQueue.Push(pbs);
  655. }
  656. }
  657. Push(new PathBlockState(currBlock));
  658. while (workQueue.TryPop(out PathBlockState pbs))
  659. {
  660. if (pbs.ReturningFromVisit)
  661. {
  662. pbs.RestoreStackState(branchStack);
  663. continue;
  664. }
  665. Block current = pbs.Block;
  666. // If the block was already processed, we just ignore it, otherwise
  667. // we would push the same child blocks of an already processed block,
  668. // and go around in circles until memory is exhausted.
  669. if (!visited.Add(current))
  670. {
  671. continue;
  672. }
  673. int pushOpsCount = current.PushOpCodes.Count;
  674. if (pushOpsCount != 0)
  675. {
  676. Push(new PathBlockState(branchStack.Count));
  677. for (int index = pushOpIndex; index < pushOpsCount; index++)
  678. {
  679. InstOp currentPushOp = current.PushOpCodes[index].Op;
  680. MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name);
  681. branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
  682. }
  683. }
  684. pushOpIndex = 0;
  685. bool hasNext = current.HasNext();
  686. if (hasNext)
  687. {
  688. Push(new PathBlockState(current.Successors[0]));
  689. }
  690. InstOp lastOp = current.GetLastOp();
  691. if (IsPopBranch(lastOp.Name))
  692. {
  693. MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name);
  694. bool found = true;
  695. ulong targetAddress = 0UL;
  696. MergeType mergeType;
  697. do
  698. {
  699. if (branchStack.Count == 0)
  700. {
  701. found = false;
  702. break;
  703. }
  704. (targetAddress, mergeType) = branchStack.Pop();
  705. // Push the target address (this will be used to push the address
  706. // back into the PBK/PCNT/SSY stack when we return from that block),
  707. Push(new PathBlockState(targetAddress, mergeType));
  708. }
  709. while (mergeType != popMergeType);
  710. // Make sure we found the correct address,
  711. // the push and pop instruction types must match, so:
  712. // - BRK can only consume addresses pushed by PBK.
  713. // - CONT can only consume addresses pushed by PCNT.
  714. // - SYNC can only consume addresses pushed by SSY.
  715. if (found)
  716. {
  717. if (branchStack.Count == 0)
  718. {
  719. // If the entire stack was consumed, then the current pop instruction
  720. // just consumed the address from our push instruction.
  721. if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
  722. {
  723. pushOpInfo.Consumers.Add(current, Local());
  724. target.Predecessors.Add(current);
  725. current.Successors.Add(target);
  726. }
  727. }
  728. else
  729. {
  730. // Push the block itself into the work queue for processing.
  731. Push(new PathBlockState(blocks[targetAddress]));
  732. }
  733. }
  734. }
  735. else
  736. {
  737. // By adding them in descending order (sorted by address), we process the blocks
  738. // in order (of ascending address), since we work with a LIFO.
  739. foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
  740. {
  741. if (!hasNext || possibleTarget != current.Successors[0])
  742. {
  743. Push(new PathBlockState(possibleTarget));
  744. }
  745. }
  746. }
  747. }
  748. }
  749. public static bool IsPopBranch(InstName name)
  750. {
  751. return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync;
  752. }
  753. private static MergeType GetMergeTypeFromPush(InstName name)
  754. {
  755. return name switch
  756. {
  757. InstName.Pbk => MergeType.Brk,
  758. InstName.Pcnt => MergeType.Cont,
  759. _ => MergeType.Sync,
  760. };
  761. }
  762. private static MergeType GetMergeTypeFromPop(InstName name)
  763. {
  764. return name switch
  765. {
  766. InstName.Brk => MergeType.Brk,
  767. InstName.Cont => MergeType.Cont,
  768. _ => MergeType.Sync,
  769. };
  770. }
  771. }
  772. }