Rewriter.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  7. namespace Ryujinx.Graphics.Shader.Translation
  8. {
  9. static class Rewriter
  10. {
  11. public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
  12. {
  13. bool isVertexShader = config.Stage == ShaderStage.Vertex;
  14. bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
  15. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  16. {
  17. BasicBlock block = blocks[blkIndex];
  18. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  19. {
  20. if (node.Value is not Operation operation)
  21. {
  22. continue;
  23. }
  24. if (isVertexShader)
  25. {
  26. if (hasConstantBufferDrawParameters)
  27. {
  28. if (ReplaceConstantBufferWithDrawParameters(operation))
  29. {
  30. config.SetUsedFeature(FeatureFlags.DrawParameters);
  31. }
  32. }
  33. else if (HasConstantBufferDrawParameters(operation))
  34. {
  35. config.SetUsedFeature(FeatureFlags.DrawParameters);
  36. }
  37. }
  38. if (UsesGlobalMemory(operation.Inst))
  39. {
  40. node = RewriteGlobalAccess(node, config);
  41. }
  42. if (operation is TextureOperation texOp)
  43. {
  44. if (texOp.Inst == Instruction.TextureSample)
  45. {
  46. node = RewriteTextureSample(node, config);
  47. if (texOp.Type == SamplerType.TextureBuffer)
  48. {
  49. node = InsertSnormNormalization(node, config);
  50. }
  51. }
  52. }
  53. }
  54. }
  55. }
  56. private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
  57. {
  58. Operation operation = (Operation)node.Value;
  59. bool isAtomic = operation.Inst.IsAtomic();
  60. bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
  61. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
  62. Operation storageOp;
  63. Operand PrependOperation(Instruction inst, params Operand[] sources)
  64. {
  65. Operand local = Local();
  66. node.List.AddBefore(node, new Operation(inst, local, sources));
  67. return local;
  68. }
  69. Operand addrLow = operation.GetSource(0);
  70. Operand addrHigh = operation.GetSource(1);
  71. Operand sbBaseAddrLow = Const(0);
  72. Operand sbSlot = Const(0);
  73. for (int slot = 0; slot < StorageMaxCount; slot++)
  74. {
  75. config.SetUsedStorageBuffer(slot, isWrite);
  76. int cbOffset = GetStorageCbOffset(config.Stage, slot);
  77. Operand baseAddrLow = Cbuf(0, cbOffset);
  78. Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
  79. Operand size = Cbuf(0, cbOffset + 2);
  80. Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
  81. Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
  82. Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
  83. Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
  84. Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
  85. Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
  86. sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
  87. sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
  88. }
  89. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  90. Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
  91. Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
  92. Operand[] sources = new Operand[operation.SourcesCount];
  93. sources[0] = sbSlot;
  94. if (isStg16Or8)
  95. {
  96. sources[1] = byteOffset;
  97. }
  98. else
  99. {
  100. sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
  101. }
  102. for (int index = 2; index < operation.SourcesCount; index++)
  103. {
  104. sources[index] = operation.GetSource(index);
  105. }
  106. if (isAtomic)
  107. {
  108. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  109. storageOp = new Operation(inst, operation.Dest, sources);
  110. }
  111. else if (operation.Inst == Instruction.LoadGlobal)
  112. {
  113. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  114. }
  115. else
  116. {
  117. Instruction storeInst = operation.Inst switch
  118. {
  119. Instruction.StoreGlobal16 => Instruction.StoreStorage16,
  120. Instruction.StoreGlobal8 => Instruction.StoreStorage8,
  121. _ => Instruction.StoreStorage
  122. };
  123. storageOp = new Operation(storeInst, null, sources);
  124. }
  125. for (int index = 0; index < operation.SourcesCount; index++)
  126. {
  127. operation.SetSource(index, null);
  128. }
  129. LinkedListNode<INode> oldNode = node;
  130. node = node.List.AddBefore(node, storageOp);
  131. node.List.Remove(oldNode);
  132. return node;
  133. }
  134. private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
  135. {
  136. TextureOperation texOp = (TextureOperation)node.Value;
  137. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  138. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  139. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  140. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  141. bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
  142. if (!hasInvalidOffset && isCoordNormalized)
  143. {
  144. return node;
  145. }
  146. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  147. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  148. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  149. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  150. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  151. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  152. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  153. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  154. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  155. int coordsCount = texOp.Type.GetDimensions();
  156. int offsetsCount;
  157. if (hasOffsets)
  158. {
  159. offsetsCount = coordsCount * 4;
  160. }
  161. else if (hasOffset)
  162. {
  163. offsetsCount = coordsCount;
  164. }
  165. else
  166. {
  167. offsetsCount = 0;
  168. }
  169. Operand[] offsets = new Operand[offsetsCount];
  170. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  171. int copyCount = 0;
  172. if (isBindless || isIndexed)
  173. {
  174. copyCount++;
  175. }
  176. Operand[] lodSources = new Operand[copyCount + coordsCount];
  177. for (int index = 0; index < lodSources.Length; index++)
  178. {
  179. lodSources[index] = texOp.GetSource(index);
  180. }
  181. copyCount += coordsCount;
  182. if (isArray)
  183. {
  184. copyCount++;
  185. }
  186. if (isShadow)
  187. {
  188. copyCount++;
  189. }
  190. if (hasDerivatives)
  191. {
  192. copyCount += coordsCount * 2;
  193. }
  194. if (isMultisample)
  195. {
  196. copyCount++;
  197. }
  198. else if (hasLodLevel)
  199. {
  200. copyCount++;
  201. }
  202. int srcIndex = 0;
  203. int dstIndex = 0;
  204. for (int index = 0; index < copyCount; index++)
  205. {
  206. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  207. }
  208. bool areAllOffsetsConstant = true;
  209. for (int index = 0; index < offsetsCount; index++)
  210. {
  211. Operand offset = texOp.GetSource(srcIndex++);
  212. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  213. offsets[index] = offset;
  214. }
  215. hasInvalidOffset &= !areAllOffsetsConstant;
  216. if (!hasInvalidOffset && isCoordNormalized)
  217. {
  218. return node;
  219. }
  220. if (hasLodBias)
  221. {
  222. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  223. }
  224. if (isGather && !isShadow)
  225. {
  226. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  227. }
  228. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  229. int componentIndex = texOp.Index;
  230. Operand Int(Operand value)
  231. {
  232. Operand res = Local();
  233. node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
  234. return res;
  235. }
  236. Operand Float(Operand value)
  237. {
  238. Operand res = Local();
  239. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
  240. return res;
  241. }
  242. // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
  243. // Without normalization, the coordinates are expected to the in the [0, W or H] range,
  244. // and otherwise, it is expected to be in the [0, 1] range.
  245. // We normalize by dividing the coords by the texture size.
  246. if (!isCoordNormalized && !intCoords)
  247. {
  248. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  249. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  250. for (int index = 0; index < normCoordsCount; index++)
  251. {
  252. Operand coordSize = Local();
  253. Operand[] texSizeSources;
  254. if (isBindless || isIndexed)
  255. {
  256. texSizeSources = new Operand[] { sources[0], Const(0) };
  257. }
  258. else
  259. {
  260. texSizeSources = new Operand[] { Const(0) };
  261. }
  262. node.List.AddBefore(node, new TextureOperation(
  263. Instruction.TextureSize,
  264. texOp.Type,
  265. texOp.Format,
  266. texOp.Flags,
  267. texOp.CbufSlot,
  268. texOp.Handle,
  269. index,
  270. coordSize,
  271. texSizeSources));
  272. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  273. Operand source = sources[coordsIndex + index];
  274. Operand coordNormalized = Local();
  275. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
  276. sources[coordsIndex + index] = coordNormalized;
  277. }
  278. }
  279. // Technically, non-constant texture offsets are not allowed (according to the spec),
  280. // however some GPUs does support that.
  281. // For GPUs where it is not supported, we can replace the instruction with the following:
  282. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  283. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  284. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  285. // For textureGatherOffset, we take advantage of the fact that the operation is already broken down
  286. // to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
  287. // for each pixel.
  288. if (hasInvalidOffset)
  289. {
  290. if (intCoords)
  291. {
  292. for (int index = 0; index < coordsCount; index++)
  293. {
  294. Operand source = sources[coordsIndex + index];
  295. Operand coordPlusOffset = Local();
  296. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  297. sources[coordsIndex + index] = coordPlusOffset;
  298. }
  299. }
  300. else
  301. {
  302. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  303. Operand lod = Local();
  304. node.List.AddBefore(node, new TextureOperation(
  305. Instruction.Lod,
  306. texOp.Type,
  307. texOp.Format,
  308. texOp.Flags,
  309. texOp.CbufSlot,
  310. texOp.Handle,
  311. 0,
  312. lod,
  313. lodSources));
  314. for (int index = 0; index < coordsCount; index++)
  315. {
  316. Operand coordSize = Local();
  317. Operand[] texSizeSources;
  318. if (isBindless || isIndexed)
  319. {
  320. texSizeSources = new Operand[] { sources[0], Int(lod) };
  321. }
  322. else
  323. {
  324. texSizeSources = new Operand[] { Int(lod) };
  325. }
  326. node.List.AddBefore(node, new TextureOperation(
  327. Instruction.TextureSize,
  328. texOp.Type,
  329. texOp.Format,
  330. texOp.Flags,
  331. texOp.CbufSlot,
  332. texOp.Handle,
  333. index,
  334. coordSize,
  335. texSizeSources));
  336. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  337. Operand offset = Local();
  338. Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
  339. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
  340. Operand source = sources[coordsIndex + index];
  341. Operand coordPlusOffset = Local();
  342. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  343. sources[coordsIndex + index] = coordPlusOffset;
  344. }
  345. }
  346. if (isGather && !isShadow)
  347. {
  348. Operand gatherComponent = sources[dstIndex - 1];
  349. Debug.Assert(gatherComponent.Type == OperandType.Constant);
  350. componentIndex = gatherComponent.Value;
  351. }
  352. }
  353. TextureOperation newTexOp = new TextureOperation(
  354. Instruction.TextureSample,
  355. texOp.Type,
  356. texOp.Format,
  357. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  358. texOp.CbufSlot,
  359. texOp.Handle,
  360. componentIndex,
  361. texOp.Dest,
  362. sources);
  363. for (int index = 0; index < texOp.SourcesCount; index++)
  364. {
  365. texOp.SetSource(index, null);
  366. }
  367. LinkedListNode<INode> oldNode = node;
  368. node = node.List.AddBefore(node, newTexOp);
  369. node.List.Remove(oldNode);
  370. return node;
  371. }
  372. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  373. {
  374. TextureOperation texOp = (TextureOperation)node.Value;
  375. // We can't query the format of a bindless texture,
  376. // because the handle is unknown, it can have any format.
  377. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  378. {
  379. return node;
  380. }
  381. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  382. int maxPositive = format switch
  383. {
  384. TextureFormat.R8Snorm => sbyte.MaxValue,
  385. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  386. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  387. TextureFormat.R16Snorm => short.MaxValue,
  388. TextureFormat.R16G16Snorm => short.MaxValue,
  389. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  390. _ => 0
  391. };
  392. // The value being 0 means that the format is not a SNORM format,
  393. // so there's nothing to do here.
  394. if (maxPositive == 0)
  395. {
  396. return node;
  397. }
  398. // Do normalization. We assume SINT formats are being used
  399. // as replacement for SNORM (which is not supported).
  400. INode[] uses = texOp.Dest.UseOps.ToArray();
  401. Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), texOp.Dest);
  402. Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  403. node = node.List.AddAfter(node, convOp);
  404. node = node.List.AddAfter(node, normOp);
  405. foreach (INode useOp in uses)
  406. {
  407. if (useOp is not Operation op)
  408. {
  409. continue;
  410. }
  411. // Replace all uses of the texture pixel value with the normalized value.
  412. for (int index = 0; index < op.SourcesCount; index++)
  413. {
  414. if (op.GetSource(index) == texOp.Dest)
  415. {
  416. op.SetSource(index, normOp.Dest);
  417. }
  418. }
  419. }
  420. return node;
  421. }
  422. private static bool ReplaceConstantBufferWithDrawParameters(Operation operation)
  423. {
  424. bool modified = false;
  425. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  426. {
  427. Operand src = operation.GetSource(srcIndex);
  428. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  429. {
  430. switch (src.GetCbufOffset())
  431. {
  432. case Constants.NvnBaseVertexByteOffset / 4:
  433. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseVertex));
  434. modified = true;
  435. break;
  436. case Constants.NvnBaseInstanceByteOffset / 4:
  437. operation.SetSource(srcIndex, Attribute(AttributeConsts.BaseInstance));
  438. modified = true;
  439. break;
  440. case Constants.NvnDrawIndexByteOffset / 4:
  441. operation.SetSource(srcIndex, Attribute(AttributeConsts.DrawIndex));
  442. modified = true;
  443. break;
  444. }
  445. }
  446. }
  447. return modified;
  448. }
  449. private static bool HasConstantBufferDrawParameters(Operation operation)
  450. {
  451. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  452. {
  453. Operand src = operation.GetSource(srcIndex);
  454. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  455. {
  456. switch (src.GetCbufOffset())
  457. {
  458. case Constants.NvnBaseVertexByteOffset / 4:
  459. case Constants.NvnBaseInstanceByteOffset / 4:
  460. case Constants.NvnDrawIndexByteOffset / 4:
  461. return true;
  462. }
  463. }
  464. }
  465. return false;
  466. }
  467. }
  468. }