Rewriter.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using System.Numerics;
  6. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  8. namespace Ryujinx.Graphics.Shader.Translation
  9. {
  10. static class Rewriter
  11. {
  12. public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
  13. {
  14. bool isVertexShader = config.Stage == ShaderStage.Vertex;
  15. bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
  16. bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
  17. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  18. {
  19. BasicBlock block = blocks[blkIndex];
  20. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  21. {
  22. if (node.Value is not Operation operation)
  23. {
  24. continue;
  25. }
  26. if (isVertexShader)
  27. {
  28. if (hasConstantBufferDrawParameters)
  29. {
  30. if (ReplaceConstantBufferWithDrawParameters(operation))
  31. {
  32. config.SetUsedFeature(FeatureFlags.DrawParameters);
  33. }
  34. }
  35. else if (HasConstantBufferDrawParameters(operation))
  36. {
  37. config.SetUsedFeature(FeatureFlags.DrawParameters);
  38. }
  39. }
  40. if (UsesGlobalMemory(operation.Inst))
  41. {
  42. node = RewriteGlobalAccess(node, config);
  43. }
  44. if (operation is TextureOperation texOp)
  45. {
  46. if (texOp.Inst == Instruction.TextureSample)
  47. {
  48. node = RewriteTextureSample(node, config);
  49. if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
  50. {
  51. node = InsertSnormNormalization(node, config);
  52. }
  53. }
  54. }
  55. }
  56. }
  57. }
  58. private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
  59. {
  60. Operation operation = (Operation)node.Value;
  61. bool isAtomic = operation.Inst.IsAtomic();
  62. bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
  63. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
  64. Operation storageOp;
  65. Operand PrependOperation(Instruction inst, params Operand[] sources)
  66. {
  67. Operand local = Local();
  68. node.List.AddBefore(node, new Operation(inst, local, sources));
  69. return local;
  70. }
  71. Operand addrLow = operation.GetSource(0);
  72. Operand addrHigh = operation.GetSource(1);
  73. Operand sbBaseAddrLow = Const(0);
  74. Operand sbSlot = Const(0);
  75. int sbUseMask = config.AccessibleStorageBuffersMask;
  76. while (sbUseMask != 0)
  77. {
  78. int slot = BitOperations.TrailingZeroCount(sbUseMask);
  79. sbUseMask &= ~(1 << slot);
  80. config.SetUsedStorageBuffer(slot, isWrite);
  81. int cbOffset = GetStorageCbOffset(config.Stage, slot);
  82. Operand baseAddrLow = Cbuf(0, cbOffset);
  83. Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
  84. Operand size = Cbuf(0, cbOffset + 2);
  85. Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
  86. Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
  87. Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
  88. Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
  89. Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
  90. Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
  91. sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
  92. sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
  93. }
  94. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  95. Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
  96. Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
  97. Operand[] sources = new Operand[operation.SourcesCount];
  98. sources[0] = sbSlot;
  99. if (isStg16Or8)
  100. {
  101. sources[1] = byteOffset;
  102. }
  103. else
  104. {
  105. sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
  106. }
  107. for (int index = 2; index < operation.SourcesCount; index++)
  108. {
  109. sources[index] = operation.GetSource(index);
  110. }
  111. if (isAtomic)
  112. {
  113. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  114. storageOp = new Operation(inst, operation.Dest, sources);
  115. }
  116. else if (operation.Inst == Instruction.LoadGlobal)
  117. {
  118. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  119. }
  120. else
  121. {
  122. Instruction storeInst = operation.Inst switch
  123. {
  124. Instruction.StoreGlobal16 => Instruction.StoreStorage16,
  125. Instruction.StoreGlobal8 => Instruction.StoreStorage8,
  126. _ => Instruction.StoreStorage
  127. };
  128. storageOp = new Operation(storeInst, null, sources);
  129. }
  130. for (int index = 0; index < operation.SourcesCount; index++)
  131. {
  132. operation.SetSource(index, null);
  133. }
  134. LinkedListNode<INode> oldNode = node;
  135. node = node.List.AddBefore(node, storageOp);
  136. node.List.Remove(oldNode);
  137. return node;
  138. }
  139. private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
  140. {
  141. TextureOperation texOp = (TextureOperation)node.Value;
  142. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  143. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  144. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  145. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  146. bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
  147. if (!hasInvalidOffset && isCoordNormalized)
  148. {
  149. return node;
  150. }
  151. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  152. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  153. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  154. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  155. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  156. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  157. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  158. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  159. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  160. int coordsCount = texOp.Type.GetDimensions();
  161. int offsetsCount;
  162. if (hasOffsets)
  163. {
  164. offsetsCount = coordsCount * 4;
  165. }
  166. else if (hasOffset)
  167. {
  168. offsetsCount = coordsCount;
  169. }
  170. else
  171. {
  172. offsetsCount = 0;
  173. }
  174. Operand[] offsets = new Operand[offsetsCount];
  175. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  176. int copyCount = 0;
  177. if (isBindless || isIndexed)
  178. {
  179. copyCount++;
  180. }
  181. Operand[] lodSources = new Operand[copyCount + coordsCount];
  182. for (int index = 0; index < lodSources.Length; index++)
  183. {
  184. lodSources[index] = texOp.GetSource(index);
  185. }
  186. copyCount += coordsCount;
  187. if (isArray)
  188. {
  189. copyCount++;
  190. }
  191. if (isShadow)
  192. {
  193. copyCount++;
  194. }
  195. if (hasDerivatives)
  196. {
  197. copyCount += coordsCount * 2;
  198. }
  199. if (isMultisample)
  200. {
  201. copyCount++;
  202. }
  203. else if (hasLodLevel)
  204. {
  205. copyCount++;
  206. }
  207. int srcIndex = 0;
  208. int dstIndex = 0;
  209. for (int index = 0; index < copyCount; index++)
  210. {
  211. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  212. }
  213. bool areAllOffsetsConstant = true;
  214. for (int index = 0; index < offsetsCount; index++)
  215. {
  216. Operand offset = texOp.GetSource(srcIndex++);
  217. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  218. offsets[index] = offset;
  219. }
  220. hasInvalidOffset &= !areAllOffsetsConstant;
  221. if (!hasInvalidOffset && isCoordNormalized)
  222. {
  223. return node;
  224. }
  225. if (hasLodBias)
  226. {
  227. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  228. }
  229. if (isGather && !isShadow)
  230. {
  231. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  232. }
  233. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  234. int componentIndex = texOp.Index;
  235. Operand Int(Operand value)
  236. {
  237. Operand res = Local();
  238. node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
  239. return res;
  240. }
  241. Operand Float(Operand value)
  242. {
  243. Operand res = Local();
  244. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
  245. return res;
  246. }
  247. // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
  248. // Without normalization, the coordinates are expected to the in the [0, W or H] range,
  249. // and otherwise, it is expected to be in the [0, 1] range.
  250. // We normalize by dividing the coords by the texture size.
  251. if (!isCoordNormalized && !intCoords)
  252. {
  253. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  254. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  255. for (int index = 0; index < normCoordsCount; index++)
  256. {
  257. Operand coordSize = Local();
  258. Operand[] texSizeSources;
  259. if (isBindless || isIndexed)
  260. {
  261. texSizeSources = new Operand[] { sources[0], Const(0) };
  262. }
  263. else
  264. {
  265. texSizeSources = new Operand[] { Const(0) };
  266. }
  267. node.List.AddBefore(node, new TextureOperation(
  268. Instruction.TextureSize,
  269. texOp.Type,
  270. texOp.Format,
  271. texOp.Flags,
  272. texOp.CbufSlot,
  273. texOp.Handle,
  274. index,
  275. coordSize,
  276. texSizeSources));
  277. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  278. Operand source = sources[coordsIndex + index];
  279. Operand coordNormalized = Local();
  280. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
  281. sources[coordsIndex + index] = coordNormalized;
  282. }
  283. }
  284. // Technically, non-constant texture offsets are not allowed (according to the spec),
  285. // however some GPUs does support that.
  286. // For GPUs where it is not supported, we can replace the instruction with the following:
  287. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  288. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  289. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  290. // For textureGatherOffset, we take advantage of the fact that the operation is already broken down
  291. // to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
  292. // for each pixel.
  293. if (hasInvalidOffset)
  294. {
  295. if (intCoords)
  296. {
  297. for (int index = 0; index < coordsCount; index++)
  298. {
  299. Operand source = sources[coordsIndex + index];
  300. Operand coordPlusOffset = Local();
  301. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  302. sources[coordsIndex + index] = coordPlusOffset;
  303. }
  304. }
  305. else
  306. {
  307. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  308. Operand lod = Local();
  309. node.List.AddBefore(node, new TextureOperation(
  310. Instruction.Lod,
  311. texOp.Type,
  312. texOp.Format,
  313. texOp.Flags,
  314. texOp.CbufSlot,
  315. texOp.Handle,
  316. 0,
  317. lod,
  318. lodSources));
  319. for (int index = 0; index < coordsCount; index++)
  320. {
  321. Operand coordSize = Local();
  322. Operand[] texSizeSources;
  323. if (isBindless || isIndexed)
  324. {
  325. texSizeSources = new Operand[] { sources[0], Int(lod) };
  326. }
  327. else
  328. {
  329. texSizeSources = new Operand[] { Int(lod) };
  330. }
  331. node.List.AddBefore(node, new TextureOperation(
  332. Instruction.TextureSize,
  333. texOp.Type,
  334. texOp.Format,
  335. texOp.Flags,
  336. texOp.CbufSlot,
  337. texOp.Handle,
  338. index,
  339. coordSize,
  340. texSizeSources));
  341. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  342. Operand offset = Local();
  343. Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
  344. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
  345. Operand source = sources[coordsIndex + index];
  346. Operand coordPlusOffset = Local();
  347. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  348. sources[coordsIndex + index] = coordPlusOffset;
  349. }
  350. }
  351. if (isGather && !isShadow)
  352. {
  353. Operand gatherComponent = sources[dstIndex - 1];
  354. Debug.Assert(gatherComponent.Type == OperandType.Constant);
  355. componentIndex = gatherComponent.Value;
  356. }
  357. }
  358. TextureOperation newTexOp = new TextureOperation(
  359. Instruction.TextureSample,
  360. texOp.Type,
  361. texOp.Format,
  362. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  363. texOp.CbufSlot,
  364. texOp.Handle,
  365. componentIndex,
  366. texOp.Dest,
  367. sources);
  368. for (int index = 0; index < texOp.SourcesCount; index++)
  369. {
  370. texOp.SetSource(index, null);
  371. }
  372. LinkedListNode<INode> oldNode = node;
  373. node = node.List.AddBefore(node, newTexOp);
  374. node.List.Remove(oldNode);
  375. return node;
  376. }
  377. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  378. {
  379. TextureOperation texOp = (TextureOperation)node.Value;
  380. // We can't query the format of a bindless texture,
  381. // because the handle is unknown, it can have any format.
  382. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  383. {
  384. return node;
  385. }
  386. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  387. int maxPositive = format switch
  388. {
  389. TextureFormat.R8Snorm => sbyte.MaxValue,
  390. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  391. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  392. TextureFormat.R16Snorm => short.MaxValue,
  393. TextureFormat.R16G16Snorm => short.MaxValue,
  394. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  395. _ => 0
  396. };
  397. // The value being 0 means that the format is not a SNORM format,
  398. // so there's nothing to do here.
  399. if (maxPositive == 0)
  400. {
  401. return node;
  402. }
  403. // Do normalization. We assume SINT formats are being used
  404. // as replacement for SNORM (which is not supported).
  405. INode[] uses = texOp.Dest.UseOps.ToArray();
  406. Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), texOp.Dest);
  407. Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  408. node = node.List.AddAfter(node, convOp);
  409. node = node.List.AddAfter(node, normOp);
  410. foreach (INode useOp in uses)
  411. {
  412. if (useOp is not Operation op)
  413. {
  414. continue;
  415. }
  416. // Replace all uses of the texture pixel value with the normalized value.
  417. for (int index = 0; index < op.SourcesCount; index++)
  418. {
  419. if (op.GetSource(index) == texOp.Dest)
  420. {
  421. op.SetSource(index, normOp.Dest);
  422. }
  423. }
  424. }
  425. return node;
  426. }
  427. private static bool ReplaceConstantBufferWithDrawParameters(Operation operation)
  428. {
  429. bool modified = false;
  430. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  431. {
  432. Operand src = operation.GetSource(srcIndex);
  433. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  434. {
  435. switch (src.GetCbufOffset())
  436. {
  437. case Constants.NvnBaseVertexByteOffset / 4:
  438. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseVertex));
  439. modified = true;
  440. break;
  441. case Constants.NvnBaseInstanceByteOffset / 4:
  442. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseInstance));
  443. modified = true;
  444. break;
  445. case Constants.NvnDrawIndexByteOffset / 4:
  446. operation.SetSource(srcIndex, Attribute(AttributeConsts.DrawIndex));
  447. modified = true;
  448. break;
  449. }
  450. }
  451. }
  452. return modified;
  453. }
  454. private static bool HasConstantBufferDrawParameters(Operation operation)
  455. {
  456. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  457. {
  458. Operand src = operation.GetSource(srcIndex);
  459. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  460. {
  461. switch (src.GetCbufOffset())
  462. {
  463. case Constants.NvnBaseVertexByteOffset / 4:
  464. case Constants.NvnBaseInstanceByteOffset / 4:
  465. case Constants.NvnDrawIndexByteOffset / 4:
  466. return true;
  467. }
  468. }
  469. }
  470. return false;
  471. }
  472. }
  473. }