Rewriter.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using System.Numerics;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  8. namespace Ryujinx.Graphics.Shader.Translation
  9. {
  10. static class Rewriter
  11. {
  12. public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
  13. {
  14. bool isVertexShader = config.Stage == ShaderStage.Vertex;
  15. bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
  16. bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
  17. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  18. {
  19. BasicBlock block = blocks[blkIndex];
  20. for (LinkedListNode<INode> node = block.Operations.First; node != null;)
  21. {
  22. if (node.Value is not Operation operation)
  23. {
  24. node = node.Next;
  25. continue;
  26. }
  27. if (isVertexShader)
  28. {
  29. if (hasConstantBufferDrawParameters)
  30. {
  31. if (ReplaceConstantBufferWithDrawParameters(operation))
  32. {
  33. config.SetUsedFeature(FeatureFlags.DrawParameters);
  34. }
  35. }
  36. else if (HasConstantBufferDrawParameters(operation))
  37. {
  38. config.SetUsedFeature(FeatureFlags.DrawParameters);
  39. }
  40. }
  41. LinkedListNode<INode> nextNode = node.Next;
  42. if (operation is TextureOperation texOp)
  43. {
  44. if (texOp.Inst == Instruction.TextureSample)
  45. {
  46. node = RewriteTextureSample(node, config);
  47. if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
  48. {
  49. node = InsertSnormNormalization(node, config);
  50. }
  51. }
  52. nextNode = node.Next;
  53. }
  54. else if (UsesGlobalMemory(operation.Inst))
  55. {
  56. nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
  57. }
  58. node = nextNode;
  59. }
  60. }
  61. }
  62. private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
  63. {
  64. Operation operation = (Operation)node.Value;
  65. bool isAtomic = operation.Inst.IsAtomic();
  66. bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
  67. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
  68. Operation storageOp = null;
  69. Operand PrependOperation(Instruction inst, params Operand[] sources)
  70. {
  71. Operand local = Local();
  72. node.List.AddBefore(node, new Operation(inst, local, sources));
  73. return local;
  74. }
  75. Operand addrLow = operation.GetSource(0);
  76. Operand addrHigh = operation.GetSource(1);
  77. Operand sbBaseAddrLow = Const(0);
  78. Operand sbSlot = Const(0);
  79. int sbUseMask = config.AccessibleStorageBuffersMask;
  80. while (sbUseMask != 0)
  81. {
  82. int slot = BitOperations.TrailingZeroCount(sbUseMask);
  83. sbUseMask &= ~(1 << slot);
  84. config.SetUsedStorageBuffer(slot, isWrite);
  85. int cbOffset = GetStorageCbOffset(config.Stage, slot);
  86. Operand baseAddrLow = Cbuf(0, cbOffset);
  87. Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
  88. Operand size = Cbuf(0, cbOffset + 2);
  89. Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
  90. Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
  91. Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
  92. Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
  93. Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
  94. Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
  95. sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
  96. sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
  97. }
  98. if (sbUseMask != 0)
  99. {
  100. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  101. Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
  102. Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
  103. Operand[] sources = new Operand[operation.SourcesCount];
  104. sources[0] = sbSlot;
  105. if (isStg16Or8)
  106. {
  107. sources[1] = byteOffset;
  108. }
  109. else
  110. {
  111. sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
  112. }
  113. for (int index = 2; index < operation.SourcesCount; index++)
  114. {
  115. sources[index] = operation.GetSource(index);
  116. }
  117. if (isAtomic)
  118. {
  119. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  120. storageOp = new Operation(inst, operation.Dest, sources);
  121. }
  122. else if (operation.Inst == Instruction.LoadGlobal)
  123. {
  124. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  125. }
  126. else
  127. {
  128. Instruction storeInst = operation.Inst switch
  129. {
  130. Instruction.StoreGlobal16 => Instruction.StoreStorage16,
  131. Instruction.StoreGlobal8 => Instruction.StoreStorage8,
  132. _ => Instruction.StoreStorage
  133. };
  134. storageOp = new Operation(storeInst, null, sources);
  135. }
  136. }
  137. else if (operation.Dest != null)
  138. {
  139. storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
  140. }
  141. for (int index = 0; index < operation.SourcesCount; index++)
  142. {
  143. operation.SetSource(index, null);
  144. }
  145. LinkedListNode<INode> oldNode = node;
  146. LinkedList<INode> oldNodeList = oldNode.List;
  147. if (storageOp != null)
  148. {
  149. node = node.List.AddBefore(node, storageOp);
  150. }
  151. else
  152. {
  153. node = null;
  154. }
  155. oldNodeList.Remove(oldNode);
  156. return node;
  157. }
  158. private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
  159. {
  160. TextureOperation texOp = (TextureOperation)node.Value;
  161. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  162. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  163. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  164. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  165. bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
  166. if (!hasInvalidOffset && isCoordNormalized)
  167. {
  168. return node;
  169. }
  170. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  171. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  172. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  173. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  174. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  175. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  176. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  177. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  178. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  179. int coordsCount = texOp.Type.GetDimensions();
  180. int offsetsCount;
  181. if (hasOffsets)
  182. {
  183. offsetsCount = coordsCount * 4;
  184. }
  185. else if (hasOffset)
  186. {
  187. offsetsCount = coordsCount;
  188. }
  189. else
  190. {
  191. offsetsCount = 0;
  192. }
  193. Operand[] offsets = new Operand[offsetsCount];
  194. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  195. int copyCount = 0;
  196. if (isBindless || isIndexed)
  197. {
  198. copyCount++;
  199. }
  200. Operand[] lodSources = new Operand[copyCount + coordsCount];
  201. for (int index = 0; index < lodSources.Length; index++)
  202. {
  203. lodSources[index] = texOp.GetSource(index);
  204. }
  205. copyCount += coordsCount;
  206. if (isArray)
  207. {
  208. copyCount++;
  209. }
  210. if (isShadow)
  211. {
  212. copyCount++;
  213. }
  214. if (hasDerivatives)
  215. {
  216. copyCount += coordsCount * 2;
  217. }
  218. if (isMultisample)
  219. {
  220. copyCount++;
  221. }
  222. else if (hasLodLevel)
  223. {
  224. copyCount++;
  225. }
  226. int srcIndex = 0;
  227. int dstIndex = 0;
  228. for (int index = 0; index < copyCount; index++)
  229. {
  230. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  231. }
  232. bool areAllOffsetsConstant = true;
  233. for (int index = 0; index < offsetsCount; index++)
  234. {
  235. Operand offset = texOp.GetSource(srcIndex++);
  236. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  237. offsets[index] = offset;
  238. }
  239. hasInvalidOffset &= !areAllOffsetsConstant;
  240. if (!hasInvalidOffset && isCoordNormalized)
  241. {
  242. return node;
  243. }
  244. if (hasLodBias)
  245. {
  246. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  247. }
  248. if (isGather && !isShadow)
  249. {
  250. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  251. }
  252. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  253. int componentIndex = texOp.Index;
  254. Operand Int(Operand value)
  255. {
  256. Operand res = Local();
  257. node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
  258. return res;
  259. }
  260. Operand Float(Operand value)
  261. {
  262. Operand res = Local();
  263. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
  264. return res;
  265. }
  266. // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
  267. // Without normalization, the coordinates are expected to the in the [0, W or H] range,
  268. // and otherwise, it is expected to be in the [0, 1] range.
  269. // We normalize by dividing the coords by the texture size.
  270. if (!isCoordNormalized && !intCoords)
  271. {
  272. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  273. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  274. for (int index = 0; index < normCoordsCount; index++)
  275. {
  276. Operand coordSize = Local();
  277. Operand[] texSizeSources;
  278. if (isBindless || isIndexed)
  279. {
  280. texSizeSources = new Operand[] { sources[0], Const(0) };
  281. }
  282. else
  283. {
  284. texSizeSources = new Operand[] { Const(0) };
  285. }
  286. node.List.AddBefore(node, new TextureOperation(
  287. Instruction.TextureSize,
  288. texOp.Type,
  289. texOp.Format,
  290. texOp.Flags,
  291. texOp.CbufSlot,
  292. texOp.Handle,
  293. index,
  294. coordSize,
  295. texSizeSources));
  296. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  297. Operand source = sources[coordsIndex + index];
  298. Operand coordNormalized = Local();
  299. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
  300. sources[coordsIndex + index] = coordNormalized;
  301. }
  302. }
  303. // Technically, non-constant texture offsets are not allowed (according to the spec),
  304. // however some GPUs does support that.
  305. // For GPUs where it is not supported, we can replace the instruction with the following:
  306. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  307. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  308. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  309. // For textureGatherOffset, we take advantage of the fact that the operation is already broken down
  310. // to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
  311. // for each pixel.
  312. if (hasInvalidOffset)
  313. {
  314. if (intCoords)
  315. {
  316. for (int index = 0; index < coordsCount; index++)
  317. {
  318. Operand source = sources[coordsIndex + index];
  319. Operand coordPlusOffset = Local();
  320. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  321. sources[coordsIndex + index] = coordPlusOffset;
  322. }
  323. }
  324. else
  325. {
  326. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  327. Operand lod = Local();
  328. node.List.AddBefore(node, new TextureOperation(
  329. Instruction.Lod,
  330. texOp.Type,
  331. texOp.Format,
  332. texOp.Flags,
  333. texOp.CbufSlot,
  334. texOp.Handle,
  335. 0,
  336. lod,
  337. lodSources));
  338. for (int index = 0; index < coordsCount; index++)
  339. {
  340. Operand coordSize = Local();
  341. Operand[] texSizeSources;
  342. if (isBindless || isIndexed)
  343. {
  344. texSizeSources = new Operand[] { sources[0], Int(lod) };
  345. }
  346. else
  347. {
  348. texSizeSources = new Operand[] { Int(lod) };
  349. }
  350. node.List.AddBefore(node, new TextureOperation(
  351. Instruction.TextureSize,
  352. texOp.Type,
  353. texOp.Format,
  354. texOp.Flags,
  355. texOp.CbufSlot,
  356. texOp.Handle,
  357. index,
  358. coordSize,
  359. texSizeSources));
  360. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  361. Operand offset = Local();
  362. Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
  363. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
  364. Operand source = sources[coordsIndex + index];
  365. Operand coordPlusOffset = Local();
  366. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  367. sources[coordsIndex + index] = coordPlusOffset;
  368. }
  369. }
  370. if (isGather && !isShadow)
  371. {
  372. Operand gatherComponent = sources[dstIndex - 1];
  373. Debug.Assert(gatherComponent.Type == OperandType.Constant);
  374. componentIndex = gatherComponent.Value;
  375. }
  376. }
  377. TextureOperation newTexOp = new TextureOperation(
  378. Instruction.TextureSample,
  379. texOp.Type,
  380. texOp.Format,
  381. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  382. texOp.CbufSlot,
  383. texOp.Handle,
  384. componentIndex,
  385. texOp.Dest,
  386. sources);
  387. for (int index = 0; index < texOp.SourcesCount; index++)
  388. {
  389. texOp.SetSource(index, null);
  390. }
  391. LinkedListNode<INode> oldNode = node;
  392. node = node.List.AddBefore(node, newTexOp);
  393. node.List.Remove(oldNode);
  394. return node;
  395. }
  396. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  397. {
  398. TextureOperation texOp = (TextureOperation)node.Value;
  399. // We can't query the format of a bindless texture,
  400. // because the handle is unknown, it can have any format.
  401. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  402. {
  403. return node;
  404. }
  405. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  406. int maxPositive = format switch
  407. {
  408. TextureFormat.R8Snorm => sbyte.MaxValue,
  409. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  410. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  411. TextureFormat.R16Snorm => short.MaxValue,
  412. TextureFormat.R16G16Snorm => short.MaxValue,
  413. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  414. _ => 0
  415. };
  416. // The value being 0 means that the format is not a SNORM format,
  417. // so there's nothing to do here.
  418. if (maxPositive == 0)
  419. {
  420. return node;
  421. }
  422. // Do normalization. We assume SINT formats are being used
  423. // as replacement for SNORM (which is not supported).
  424. INode[] uses = texOp.Dest.UseOps.ToArray();
  425. Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), texOp.Dest);
  426. Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  427. node = node.List.AddAfter(node, convOp);
  428. node = node.List.AddAfter(node, normOp);
  429. foreach (INode useOp in uses)
  430. {
  431. if (useOp is not Operation op)
  432. {
  433. continue;
  434. }
  435. // Replace all uses of the texture pixel value with the normalized value.
  436. for (int index = 0; index < op.SourcesCount; index++)
  437. {
  438. if (op.GetSource(index) == texOp.Dest)
  439. {
  440. op.SetSource(index, normOp.Dest);
  441. }
  442. }
  443. }
  444. return node;
  445. }
  446. private static bool ReplaceConstantBufferWithDrawParameters(Operation operation)
  447. {
  448. bool modified = false;
  449. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  450. {
  451. Operand src = operation.GetSource(srcIndex);
  452. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  453. {
  454. switch (src.GetCbufOffset())
  455. {
  456. case Constants.NvnBaseVertexByteOffset / 4:
  457. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseVertex));
  458. modified = true;
  459. break;
  460. case Constants.NvnBaseInstanceByteOffset / 4:
  461. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseInstance));
  462. modified = true;
  463. break;
  464. case Constants.NvnDrawIndexByteOffset / 4:
  465. operation.SetSource(srcIndex, Attribute(AttributeConsts.DrawIndex));
  466. modified = true;
  467. break;
  468. }
  469. }
  470. }
  471. return modified;
  472. }
  473. private static bool HasConstantBufferDrawParameters(Operation operation)
  474. {
  475. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  476. {
  477. Operand src = operation.GetSource(srcIndex);
  478. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  479. {
  480. switch (src.GetCbufOffset())
  481. {
  482. case Constants.NvnBaseVertexByteOffset / 4:
  483. case Constants.NvnBaseInstanceByteOffset / 4:
  484. case Constants.NvnDrawIndexByteOffset / 4:
  485. return true;
  486. }
  487. }
  488. }
  489. return false;
  490. }
  491. }
  492. }