GlobalToStorage.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  4. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  5. namespace Ryujinx.Graphics.Shader.Translation.Optimizations
  6. {
  7. static class GlobalToStorage
  8. {
  9. private struct SearchResult
  10. {
  11. public static SearchResult NotFound => new SearchResult(-1, 0);
  12. public bool Found => SbCbSlot != -1;
  13. public int SbCbSlot { get; }
  14. public int SbCbOffset { get; }
  15. public SearchResult(int sbCbSlot, int sbCbOffset)
  16. {
  17. SbCbSlot = sbCbSlot;
  18. SbCbOffset = sbCbOffset;
  19. }
  20. }
  21. public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
  22. {
  23. int sbStart = GetStorageBaseCbOffset(config.Stage);
  24. int sbEnd = sbStart + StorageDescsSize;
  25. int ubeStart = UbeBaseOffset;
  26. int ubeEnd = UbeBaseOffset + UbeDescsSize;
  27. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  28. {
  29. for (int index = 0; index < node.Value.SourcesCount; index++)
  30. {
  31. Operand src = node.Value.GetSource(index);
  32. int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
  33. if (storageIndex >= 0)
  34. {
  35. sbUseMask |= 1 << storageIndex;
  36. }
  37. if (config.Stage == ShaderStage.Compute)
  38. {
  39. int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
  40. if (constantIndex >= 0)
  41. {
  42. ubeUseMask |= 1 << constantIndex;
  43. }
  44. }
  45. }
  46. if (!(node.Value is Operation operation))
  47. {
  48. continue;
  49. }
  50. if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
  51. {
  52. Operand source = operation.GetSource(0);
  53. var result = SearchForStorageBase(config, block, source);
  54. if (!result.Found)
  55. {
  56. continue;
  57. }
  58. if (config.Stage == ShaderStage.Compute &&
  59. operation.Inst == Instruction.LoadGlobal &&
  60. result.SbCbSlot == DriverReservedCb &&
  61. result.SbCbOffset >= UbeBaseOffset &&
  62. result.SbCbOffset < UbeBaseOffset + UbeDescsSize)
  63. {
  64. // Here we effectively try to replace a LDG instruction with LDC.
  65. // The hardware only supports a limited amount of constant buffers
  66. // so NVN "emulates" more constant buffers using global memory access.
  67. // Here we try to replace the global access back to a constant buffer
  68. // load.
  69. node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize);
  70. }
  71. else
  72. {
  73. // Storage buffers are implemented using global memory access.
  74. // If we know from where the base address of the access is loaded,
  75. // we can guess which storage buffer it is accessing.
  76. // We can then replace the global memory access with a storage
  77. // buffer access.
  78. node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset));
  79. }
  80. }
  81. }
  82. config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
  83. }
  84. private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
  85. {
  86. Operation operation = (Operation)node.Value;
  87. bool isAtomic = operation.Inst.IsAtomic();
  88. bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
  89. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
  90. config.SetUsedStorageBuffer(storageIndex, isWrite);
  91. Operand[] sources = new Operand[operation.SourcesCount];
  92. sources[0] = Const(storageIndex);
  93. sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
  94. for (int index = 2; index < operation.SourcesCount; index++)
  95. {
  96. sources[index] = operation.GetSource(index);
  97. }
  98. Operation storageOp;
  99. if (isAtomic)
  100. {
  101. storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
  102. }
  103. else if (operation.Inst == Instruction.LoadGlobal)
  104. {
  105. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  106. }
  107. else
  108. {
  109. Instruction storeInst = operation.Inst switch
  110. {
  111. Instruction.StoreGlobal16 => Instruction.StoreStorage16,
  112. Instruction.StoreGlobal8 => Instruction.StoreStorage8,
  113. _ => Instruction.StoreStorage
  114. };
  115. storageOp = new Operation(storeInst, null, sources);
  116. }
  117. for (int index = 0; index < operation.SourcesCount; index++)
  118. {
  119. operation.SetSource(index, null);
  120. }
  121. LinkedListNode<INode> oldNode = node;
  122. node = node.List.AddBefore(node, storageOp);
  123. node.List.Remove(oldNode);
  124. return node;
  125. }
  126. private static Operand GetStorageOffset(
  127. BasicBlock block,
  128. LinkedListNode<INode> node,
  129. ShaderConfig config,
  130. int storageIndex,
  131. Operand addrLow,
  132. bool isStg16Or8)
  133. {
  134. (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex);
  135. bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
  136. (Operand byteOffset, int constantOffset) = storageAligned ?
  137. GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) :
  138. (null, 0);
  139. if (byteOffset != null)
  140. {
  141. ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
  142. }
  143. if (byteOffset == null)
  144. {
  145. Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
  146. Operand baseAddrTrunc = Local();
  147. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  148. Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
  149. node.List.AddBefore(node, andOp);
  150. Operand offset = Local();
  151. Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
  152. node.List.AddBefore(node, subOp);
  153. byteOffset = offset;
  154. }
  155. else if (constantOffset != 0)
  156. {
  157. Operand offset = Local();
  158. Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
  159. node.List.AddBefore(node, addOp);
  160. byteOffset = offset;
  161. }
  162. if (isStg16Or8)
  163. {
  164. return byteOffset;
  165. }
  166. Operand wordOffset = Local();
  167. Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
  168. node.List.AddBefore(node, shrOp);
  169. return wordOffset;
  170. }
  171. private static bool IsCbOffset(Operand operand, int slot, int offset)
  172. {
  173. return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset;
  174. }
  175. private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
  176. {
  177. // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
  178. // Eliminate the storage buffer base address from this too, leaving only the byte offset.
  179. foreach (INode useNode in address.UseOps)
  180. {
  181. if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
  182. {
  183. Operand src1 = op.GetSource(0);
  184. Operand src2 = op.GetSource(1);
  185. int addressIndex = -1;
  186. if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
  187. {
  188. addressIndex = 0;
  189. }
  190. else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
  191. {
  192. addressIndex = 1;
  193. }
  194. if (addressIndex != -1)
  195. {
  196. LinkedListNode<INode> node = list.Find(op);
  197. // Add offset calculation before the use. Needs to be on the same block.
  198. if (node != null)
  199. {
  200. Operand offset = Local();
  201. Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
  202. list.AddBefore(node, addOp);
  203. op.SetSource(addressIndex, offset);
  204. }
  205. }
  206. }
  207. }
  208. }
  209. private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset)
  210. {
  211. if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
  212. {
  213. // Direct offset: zero.
  214. return (Const(0), 0);
  215. }
  216. (address, int constantOffset) = GetStorageConstantOffset(block, address);
  217. address = Utils.FindLastOperation(address, block);
  218. if (IsCbOffset(address, cbSlot, baseAddressCbOffset))
  219. {
  220. // Only constant offset
  221. return (Const(0), constantOffset);
  222. }
  223. if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
  224. {
  225. return (null, 0);
  226. }
  227. Operand src1 = offsetAdd.GetSource(0);
  228. Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
  229. if (IsCbOffset(src2, cbSlot, baseAddressCbOffset))
  230. {
  231. return (src1, constantOffset);
  232. }
  233. else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset))
  234. {
  235. return (src2, constantOffset);
  236. }
  237. return (null, 0);
  238. }
  239. private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
  240. {
  241. if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
  242. {
  243. return (address, 0);
  244. }
  245. Operand src1 = offsetAdd.GetSource(0);
  246. Operand src2 = offsetAdd.GetSource(1);
  247. if (src2.Type != OperandType.Constant)
  248. {
  249. return (address, 0);
  250. }
  251. return (src1, src2.Value);
  252. }
  253. private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
  254. {
  255. Operation operation = (Operation)node.Value;
  256. Operand GetCbufOffset()
  257. {
  258. Operand addrLow = operation.GetSource(0);
  259. Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
  260. Operand baseAddrTrunc = Local();
  261. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  262. Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
  263. node.List.AddBefore(node, andOp);
  264. Operand byteOffset = Local();
  265. Operand wordOffset = Local();
  266. Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
  267. Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
  268. node.List.AddBefore(node, subOp);
  269. node.List.AddBefore(node, shrOp);
  270. return wordOffset;
  271. }
  272. Operand cbufOffset = GetCbufOffset();
  273. Operand vecIndex = Local();
  274. Operand elemIndex = Local();
  275. node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2)));
  276. node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3)));
  277. Operand[] sources = new Operand[4];
  278. int cbSlot = UbeFirstCbuf + storageIndex;
  279. sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
  280. sources[1] = Const(0);
  281. sources[2] = vecIndex;
  282. sources[3] = elemIndex;
  283. Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources);
  284. for (int index = 0; index < operation.SourcesCount; index++)
  285. {
  286. operation.SetSource(index, null);
  287. }
  288. LinkedListNode<INode> oldNode = node;
  289. node = node.List.AddBefore(node, ldcOp);
  290. node.List.Remove(oldNode);
  291. return node;
  292. }
  293. private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress)
  294. {
  295. globalAddress = Utils.FindLastOperation(globalAddress, block);
  296. if (globalAddress.Type == OperandType.ConstantBuffer)
  297. {
  298. return GetStorageIndex(config, globalAddress);
  299. }
  300. Operation operation = globalAddress.AsgOp as Operation;
  301. if (operation == null || operation.Inst != Instruction.Add)
  302. {
  303. return SearchResult.NotFound;
  304. }
  305. Operand src1 = operation.GetSource(0);
  306. Operand src2 = operation.GetSource(1);
  307. if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
  308. (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
  309. {
  310. Operand baseAddr;
  311. if (src1.Type == OperandType.LocalVariable)
  312. {
  313. baseAddr = Utils.FindLastOperation(src1, block);
  314. }
  315. else
  316. {
  317. baseAddr = Utils.FindLastOperation(src2, block);
  318. }
  319. var result = GetStorageIndex(config, baseAddr);
  320. if (result.Found)
  321. {
  322. return result;
  323. }
  324. operation = baseAddr.AsgOp as Operation;
  325. if (operation == null || operation.Inst != Instruction.Add)
  326. {
  327. return SearchResult.NotFound;
  328. }
  329. }
  330. var selectedResult = SearchResult.NotFound;
  331. for (int index = 0; index < operation.SourcesCount; index++)
  332. {
  333. Operand source = operation.GetSource(index);
  334. var result = GetStorageIndex(config, source);
  335. // If we already have a result, we give preference to the ones from
  336. // the driver reserved constant buffer, as those are the ones that
  337. // contains the base address.
  338. if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb))
  339. {
  340. selectedResult = result;
  341. }
  342. }
  343. return selectedResult;
  344. }
  345. private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand)
  346. {
  347. if (operand.Type == OperandType.ConstantBuffer)
  348. {
  349. int slot = operand.GetCbufSlot();
  350. int offset = operand.GetCbufOffset();
  351. if ((offset & 3) == 0)
  352. {
  353. return new SearchResult(slot, offset);
  354. }
  355. }
  356. return SearchResult.NotFound;
  357. }
  358. private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
  359. {
  360. if (operand.Type == OperandType.ConstantBuffer)
  361. {
  362. int slot = operand.GetCbufSlot();
  363. int offset = operand.GetCbufOffset();
  364. if (slot == 0 && offset >= sbStart && offset < sbEnd)
  365. {
  366. int storageIndex = (offset - sbStart) / StorageDescSize;
  367. return storageIndex;
  368. }
  369. }
  370. return -1;
  371. }
  372. }
  373. }