Rewriter.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  7. namespace Ryujinx.Graphics.Shader.Translation
  8. {
  9. static class Rewriter
  10. {
  11. public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
  12. {
  13. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  14. {
  15. BasicBlock block = blocks[blkIndex];
  16. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  17. {
  18. if (node.Value is not Operation operation)
  19. {
  20. continue;
  21. }
  22. if (UsesGlobalMemory(operation.Inst))
  23. {
  24. node = RewriteGlobalAccess(node, config);
  25. }
  26. if (operation is TextureOperation texOp)
  27. {
  28. if (texOp.Inst == Instruction.TextureSample)
  29. {
  30. node = RewriteTextureSample(node, config);
  31. if (texOp.Type == SamplerType.TextureBuffer)
  32. {
  33. node = InsertSnormNormalization(node, config);
  34. }
  35. }
  36. }
  37. }
  38. }
  39. }
  40. private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
  41. {
  42. Operation operation = (Operation)node.Value;
  43. bool isAtomic = operation.Inst.IsAtomic();
  44. bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
  45. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
  46. Operation storageOp;
  47. Operand PrependOperation(Instruction inst, params Operand[] sources)
  48. {
  49. Operand local = Local();
  50. node.List.AddBefore(node, new Operation(inst, local, sources));
  51. return local;
  52. }
  53. Operand addrLow = operation.GetSource(0);
  54. Operand addrHigh = operation.GetSource(1);
  55. Operand sbBaseAddrLow = Const(0);
  56. Operand sbSlot = Const(0);
  57. for (int slot = 0; slot < StorageMaxCount; slot++)
  58. {
  59. config.SetUsedStorageBuffer(slot, isWrite);
  60. int cbOffset = GetStorageCbOffset(config.Stage, slot);
  61. Operand baseAddrLow = Cbuf(0, cbOffset);
  62. Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
  63. Operand size = Cbuf(0, cbOffset + 2);
  64. Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
  65. Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
  66. Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
  67. Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
  68. Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
  69. Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
  70. sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
  71. sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
  72. }
  73. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  74. Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
  75. Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
  76. Operand[] sources = new Operand[operation.SourcesCount];
  77. sources[0] = sbSlot;
  78. if (isStg16Or8)
  79. {
  80. sources[1] = byteOffset;
  81. }
  82. else
  83. {
  84. sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
  85. }
  86. for (int index = 2; index < operation.SourcesCount; index++)
  87. {
  88. sources[index] = operation.GetSource(index);
  89. }
  90. if (isAtomic)
  91. {
  92. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  93. storageOp = new Operation(inst, operation.Dest, sources);
  94. }
  95. else if (operation.Inst == Instruction.LoadGlobal)
  96. {
  97. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  98. }
  99. else
  100. {
  101. Instruction storeInst = operation.Inst switch
  102. {
  103. Instruction.StoreGlobal16 => Instruction.StoreStorage16,
  104. Instruction.StoreGlobal8 => Instruction.StoreStorage8,
  105. _ => Instruction.StoreStorage
  106. };
  107. storageOp = new Operation(storeInst, null, sources);
  108. }
  109. for (int index = 0; index < operation.SourcesCount; index++)
  110. {
  111. operation.SetSource(index, null);
  112. }
  113. LinkedListNode<INode> oldNode = node;
  114. node = node.List.AddBefore(node, storageOp);
  115. node.List.Remove(oldNode);
  116. return node;
  117. }
  118. private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
  119. {
  120. TextureOperation texOp = (TextureOperation)node.Value;
  121. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  122. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  123. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  124. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  125. bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
  126. if (!hasInvalidOffset && isCoordNormalized)
  127. {
  128. return node;
  129. }
  130. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  131. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  132. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  133. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  134. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  135. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  136. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  137. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  138. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  139. int coordsCount = texOp.Type.GetDimensions();
  140. int offsetsCount;
  141. if (hasOffsets)
  142. {
  143. offsetsCount = coordsCount * 4;
  144. }
  145. else if (hasOffset)
  146. {
  147. offsetsCount = coordsCount;
  148. }
  149. else
  150. {
  151. offsetsCount = 0;
  152. }
  153. Operand[] offsets = new Operand[offsetsCount];
  154. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  155. int copyCount = 0;
  156. if (isBindless || isIndexed)
  157. {
  158. copyCount++;
  159. }
  160. Operand[] lodSources = new Operand[copyCount + coordsCount];
  161. for (int index = 0; index < lodSources.Length; index++)
  162. {
  163. lodSources[index] = texOp.GetSource(index);
  164. }
  165. copyCount += coordsCount;
  166. if (isArray)
  167. {
  168. copyCount++;
  169. }
  170. if (isShadow)
  171. {
  172. copyCount++;
  173. }
  174. if (hasDerivatives)
  175. {
  176. copyCount += coordsCount * 2;
  177. }
  178. if (isMultisample)
  179. {
  180. copyCount++;
  181. }
  182. else if (hasLodLevel)
  183. {
  184. copyCount++;
  185. }
  186. int srcIndex = 0;
  187. int dstIndex = 0;
  188. for (int index = 0; index < copyCount; index++)
  189. {
  190. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  191. }
  192. bool areAllOffsetsConstant = true;
  193. for (int index = 0; index < offsetsCount; index++)
  194. {
  195. Operand offset = texOp.GetSource(srcIndex++);
  196. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  197. offsets[index] = offset;
  198. }
  199. hasInvalidOffset &= !areAllOffsetsConstant;
  200. if (!hasInvalidOffset && isCoordNormalized)
  201. {
  202. return node;
  203. }
  204. if (hasLodBias)
  205. {
  206. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  207. }
  208. if (isGather && !isShadow)
  209. {
  210. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  211. }
  212. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  213. int componentIndex = texOp.Index;
  214. Operand Int(Operand value)
  215. {
  216. Operand res = Local();
  217. node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
  218. return res;
  219. }
  220. Operand Float(Operand value)
  221. {
  222. Operand res = Local();
  223. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
  224. return res;
  225. }
  226. // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
  227. // Without normalization, the coordinates are expected to the in the [0, W or H] range,
  228. // and otherwise, it is expected to be in the [0, 1] range.
  229. // We normalize by dividing the coords by the texture size.
  230. if (!isCoordNormalized && !intCoords)
  231. {
  232. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  233. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  234. for (int index = 0; index < normCoordsCount; index++)
  235. {
  236. Operand coordSize = Local();
  237. Operand[] texSizeSources;
  238. if (isBindless || isIndexed)
  239. {
  240. texSizeSources = new Operand[] { sources[0], Const(0) };
  241. }
  242. else
  243. {
  244. texSizeSources = new Operand[] { Const(0) };
  245. }
  246. node.List.AddBefore(node, new TextureOperation(
  247. Instruction.TextureSize,
  248. texOp.Type,
  249. texOp.Format,
  250. texOp.Flags,
  251. texOp.CbufSlot,
  252. texOp.Handle,
  253. index,
  254. coordSize,
  255. texSizeSources));
  256. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  257. Operand source = sources[coordsIndex + index];
  258. Operand coordNormalized = Local();
  259. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
  260. sources[coordsIndex + index] = coordNormalized;
  261. }
  262. }
  263. // Technically, non-constant texture offsets are not allowed (according to the spec),
  264. // however some GPUs does support that.
  265. // For GPUs where it is not supported, we can replace the instruction with the following:
  266. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  267. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  268. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  269. // For textureGatherOffset, we take advantage of the fact that the operation is already broken down
  270. // to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
  271. // for each pixel.
  272. if (hasInvalidOffset)
  273. {
  274. if (intCoords)
  275. {
  276. for (int index = 0; index < coordsCount; index++)
  277. {
  278. Operand source = sources[coordsIndex + index];
  279. Operand coordPlusOffset = Local();
  280. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  281. sources[coordsIndex + index] = coordPlusOffset;
  282. }
  283. }
  284. else
  285. {
  286. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  287. Operand lod = Local();
  288. node.List.AddBefore(node, new TextureOperation(
  289. Instruction.Lod,
  290. texOp.Type,
  291. texOp.Format,
  292. texOp.Flags,
  293. texOp.CbufSlot,
  294. texOp.Handle,
  295. 0,
  296. lod,
  297. lodSources));
  298. for (int index = 0; index < coordsCount; index++)
  299. {
  300. Operand coordSize = Local();
  301. Operand[] texSizeSources;
  302. if (isBindless || isIndexed)
  303. {
  304. texSizeSources = new Operand[] { sources[0], Int(lod) };
  305. }
  306. else
  307. {
  308. texSizeSources = new Operand[] { Int(lod) };
  309. }
  310. node.List.AddBefore(node, new TextureOperation(
  311. Instruction.TextureSize,
  312. texOp.Type,
  313. texOp.Format,
  314. texOp.Flags,
  315. texOp.CbufSlot,
  316. texOp.Handle,
  317. index,
  318. coordSize,
  319. texSizeSources));
  320. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  321. Operand offset = Local();
  322. Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
  323. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
  324. Operand source = sources[coordsIndex + index];
  325. Operand coordPlusOffset = Local();
  326. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  327. sources[coordsIndex + index] = coordPlusOffset;
  328. }
  329. }
  330. if (isGather && !isShadow)
  331. {
  332. Operand gatherComponent = sources[dstIndex - 1];
  333. Debug.Assert(gatherComponent.Type == OperandType.Constant);
  334. componentIndex = gatherComponent.Value;
  335. }
  336. }
  337. TextureOperation newTexOp = new TextureOperation(
  338. Instruction.TextureSample,
  339. texOp.Type,
  340. texOp.Format,
  341. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  342. texOp.CbufSlot,
  343. texOp.Handle,
  344. componentIndex,
  345. texOp.Dest,
  346. sources);
  347. for (int index = 0; index < texOp.SourcesCount; index++)
  348. {
  349. texOp.SetSource(index, null);
  350. }
  351. LinkedListNode<INode> oldNode = node;
  352. node = node.List.AddBefore(node, newTexOp);
  353. node.List.Remove(oldNode);
  354. return node;
  355. }
  356. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  357. {
  358. TextureOperation texOp = (TextureOperation)node.Value;
  359. // We can't query the format of a bindless texture,
  360. // because the handle is unknown, it can have any format.
  361. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  362. {
  363. return node;
  364. }
  365. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  366. int maxPositive = format switch
  367. {
  368. TextureFormat.R8Snorm => sbyte.MaxValue,
  369. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  370. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  371. TextureFormat.R16Snorm => short.MaxValue,
  372. TextureFormat.R16G16Snorm => short.MaxValue,
  373. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  374. _ => 0
  375. };
  376. // The value being 0 means that the format is not a SNORM format,
  377. // so there's nothing to do here.
  378. if (maxPositive == 0)
  379. {
  380. return node;
  381. }
  382. // Do normalization. We assume SINT formats are being used
  383. // as replacement for SNORM (which is not supported).
  384. INode[] uses = texOp.Dest.UseOps.ToArray();
  385. Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), texOp.Dest);
  386. Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  387. node = node.List.AddAfter(node, convOp);
  388. node = node.List.AddAfter(node, normOp);
  389. foreach (INode useOp in uses)
  390. {
  391. if (useOp is not Operation op)
  392. {
  393. continue;
  394. }
  395. // Replace all uses of the texture pixel value with the normalized value.
  396. for (int index = 0; index < op.SourcesCount; index++)
  397. {
  398. if (op.GetSource(index) == texOp.Dest)
  399. {
  400. op.SetSource(index, normOp.Dest);
  401. }
  402. }
  403. }
  404. return node;
  405. }
  406. }
  407. }