Rewriter.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Linq;
  5. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6. using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
  7. namespace Ryujinx.Graphics.Shader.Translation
  8. {
  9. static class Rewriter
  10. {
  11. public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
  12. {
  13. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  14. {
  15. BasicBlock block = blocks[blkIndex];
  16. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  17. {
  18. if (node.Value is not Operation operation)
  19. {
  20. continue;
  21. }
  22. if (UsesGlobalMemory(operation.Inst))
  23. {
  24. node = RewriteGlobalAccess(node, config);
  25. }
  26. if (operation is TextureOperation texOp)
  27. {
  28. if (texOp.Inst == Instruction.TextureSample)
  29. {
  30. node = RewriteTextureSample(node, config);
  31. if (texOp.Type == SamplerType.TextureBuffer)
  32. {
  33. node = InsertSnormNormalization(node, config);
  34. }
  35. }
  36. }
  37. }
  38. }
  39. }
  40. private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
  41. {
  42. Operation operation = (Operation)node.Value;
  43. bool isAtomic = operation.Inst.IsAtomic();
  44. bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal;
  45. Operation storageOp;
  46. Operand PrependOperation(Instruction inst, params Operand[] sources)
  47. {
  48. Operand local = Local();
  49. node.List.AddBefore(node, new Operation(inst, local, sources));
  50. return local;
  51. }
  52. Operand addrLow = operation.GetSource(0);
  53. Operand addrHigh = operation.GetSource(1);
  54. Operand sbBaseAddrLow = Const(0);
  55. Operand sbSlot = Const(0);
  56. for (int slot = 0; slot < StorageMaxCount; slot++)
  57. {
  58. config.SetUsedStorageBuffer(slot, isWrite);
  59. int cbOffset = GetStorageCbOffset(config.Stage, slot);
  60. Operand baseAddrLow = config.CreateCbuf(0, cbOffset);
  61. Operand baseAddrHigh = config.CreateCbuf(0, cbOffset + 1);
  62. Operand size = config.CreateCbuf(0, cbOffset + 2);
  63. Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
  64. Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
  65. Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
  66. Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
  67. Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
  68. Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
  69. sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
  70. sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
  71. }
  72. Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
  73. Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
  74. Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
  75. Operand wordOffset = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
  76. Operand[] sources = new Operand[operation.SourcesCount];
  77. sources[0] = sbSlot;
  78. sources[1] = wordOffset;
  79. for (int index = 2; index < operation.SourcesCount; index++)
  80. {
  81. sources[index] = operation.GetSource(index);
  82. }
  83. if (isAtomic)
  84. {
  85. Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
  86. storageOp = new Operation(inst, operation.Dest, sources);
  87. }
  88. else if (operation.Inst == Instruction.LoadGlobal)
  89. {
  90. storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
  91. }
  92. else
  93. {
  94. storageOp = new Operation(Instruction.StoreStorage, null, sources);
  95. }
  96. for (int index = 0; index < operation.SourcesCount; index++)
  97. {
  98. operation.SetSource(index, null);
  99. }
  100. LinkedListNode<INode> oldNode = node;
  101. node = node.List.AddBefore(node, storageOp);
  102. node.List.Remove(oldNode);
  103. return node;
  104. }
  105. private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
  106. {
  107. TextureOperation texOp = (TextureOperation)node.Value;
  108. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  109. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  110. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  111. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  112. bool isRect = !isBindless && config.GpuAccessor.QueryIsTextureRectangle(texOp.Handle, texOp.CbufSlot);
  113. if (!(hasInvalidOffset || isRect))
  114. {
  115. return node;
  116. }
  117. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  118. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  119. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  120. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  121. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  122. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  123. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  124. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  125. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  126. int coordsCount = texOp.Type.GetDimensions();
  127. int offsetsCount;
  128. if (hasOffsets)
  129. {
  130. offsetsCount = coordsCount * 4;
  131. }
  132. else if (hasOffset)
  133. {
  134. offsetsCount = coordsCount;
  135. }
  136. else
  137. {
  138. offsetsCount = 0;
  139. }
  140. Operand[] offsets = new Operand[offsetsCount];
  141. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  142. int copyCount = 0;
  143. if (isBindless || isIndexed)
  144. {
  145. copyCount++;
  146. }
  147. Operand[] lodSources = new Operand[copyCount + coordsCount];
  148. for (int index = 0; index < lodSources.Length; index++)
  149. {
  150. lodSources[index] = texOp.GetSource(index);
  151. }
  152. copyCount += coordsCount;
  153. if (isArray)
  154. {
  155. copyCount++;
  156. }
  157. if (isShadow)
  158. {
  159. copyCount++;
  160. }
  161. if (hasDerivatives)
  162. {
  163. copyCount += coordsCount * 2;
  164. }
  165. if (isMultisample)
  166. {
  167. copyCount++;
  168. }
  169. else if (hasLodLevel)
  170. {
  171. copyCount++;
  172. }
  173. int srcIndex = 0;
  174. int dstIndex = 0;
  175. for (int index = 0; index < copyCount; index++)
  176. {
  177. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  178. }
  179. bool areAllOffsetsConstant = true;
  180. for (int index = 0; index < offsetsCount; index++)
  181. {
  182. Operand offset = texOp.GetSource(srcIndex++);
  183. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  184. offsets[index] = offset;
  185. }
  186. hasInvalidOffset &= !areAllOffsetsConstant;
  187. if (!(hasInvalidOffset || isRect))
  188. {
  189. return node;
  190. }
  191. if (hasLodBias)
  192. {
  193. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  194. }
  195. if (isGather && !isShadow)
  196. {
  197. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  198. }
  199. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  200. int componentIndex = texOp.Index;
  201. Operand Int(Operand value)
  202. {
  203. Operand res = Local();
  204. node.List.AddBefore(node, new Operation(Instruction.ConvertFPToS32, res, value));
  205. return res;
  206. }
  207. Operand Float(Operand value)
  208. {
  209. Operand res = Local();
  210. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP, res, value));
  211. return res;
  212. }
  213. // Emulate texture rectangle by normalizing the coordinates on the shader.
  214. // When sampler*Rect is used, the coords are expected to the in the [0, W or H] range,
  215. // and otherwise, it is expected to be in the [0, 1] range.
  216. // We normalize by dividing the coords by the texture size.
  217. if (isRect && !intCoords)
  218. {
  219. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  220. for (int index = 0; index < coordsCount; index++)
  221. {
  222. Operand coordSize = Local();
  223. Operand[] texSizeSources;
  224. if (isBindless || isIndexed)
  225. {
  226. texSizeSources = new Operand[] { sources[0], Const(0) };
  227. }
  228. else
  229. {
  230. texSizeSources = new Operand[] { Const(0) };
  231. }
  232. node.List.AddBefore(node, new TextureOperation(
  233. Instruction.TextureSize,
  234. texOp.Type,
  235. texOp.Format,
  236. texOp.Flags,
  237. texOp.CbufSlot,
  238. texOp.Handle,
  239. index,
  240. coordSize,
  241. texSizeSources));
  242. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  243. Operand source = sources[coordsIndex + index];
  244. Operand coordNormalized = Local();
  245. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
  246. sources[coordsIndex + index] = coordNormalized;
  247. }
  248. }
  249. // Technically, non-constant texture offsets are not allowed (according to the spec),
  250. // however some GPUs does support that.
  251. // For GPUs where it is not supported, we can replace the instruction with the following:
  252. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  253. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  254. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  255. // For textureGatherOffset, we take advantage of the fact that the operation is already broken down
  256. // to read the 4 pixels separately, and just replace it with 4 textureGather with a different offset
  257. // for each pixel.
  258. if (hasInvalidOffset)
  259. {
  260. if (intCoords)
  261. {
  262. for (int index = 0; index < coordsCount; index++)
  263. {
  264. Operand source = sources[coordsIndex + index];
  265. Operand coordPlusOffset = Local();
  266. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  267. sources[coordsIndex + index] = coordPlusOffset;
  268. }
  269. }
  270. else
  271. {
  272. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  273. Operand lod = Local();
  274. node.List.AddBefore(node, new TextureOperation(
  275. Instruction.Lod,
  276. texOp.Type,
  277. texOp.Format,
  278. texOp.Flags,
  279. texOp.CbufSlot,
  280. texOp.Handle,
  281. 0,
  282. lod,
  283. lodSources));
  284. for (int index = 0; index < coordsCount; index++)
  285. {
  286. Operand coordSize = Local();
  287. Operand[] texSizeSources;
  288. if (isBindless || isIndexed)
  289. {
  290. texSizeSources = new Operand[] { sources[0], Int(lod) };
  291. }
  292. else
  293. {
  294. texSizeSources = new Operand[] { Int(lod) };
  295. }
  296. node.List.AddBefore(node, new TextureOperation(
  297. Instruction.TextureSize,
  298. texOp.Type,
  299. texOp.Format,
  300. texOp.Flags,
  301. texOp.CbufSlot,
  302. texOp.Handle,
  303. index,
  304. coordSize,
  305. texSizeSources));
  306. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  307. Operand offset = Local();
  308. Operand intOffset = offsets[index + (hasOffsets ? texOp.Index * coordsCount : 0)];
  309. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(coordSize)));
  310. Operand source = sources[coordsIndex + index];
  311. Operand coordPlusOffset = Local();
  312. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  313. sources[coordsIndex + index] = coordPlusOffset;
  314. }
  315. }
  316. if (isGather && !isShadow)
  317. {
  318. Operand gatherComponent = sources[dstIndex - 1];
  319. Debug.Assert(gatherComponent.Type == OperandType.Constant);
  320. componentIndex = gatherComponent.Value;
  321. }
  322. }
  323. TextureOperation newTexOp = new TextureOperation(
  324. Instruction.TextureSample,
  325. texOp.Type,
  326. texOp.Format,
  327. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  328. texOp.CbufSlot,
  329. texOp.Handle,
  330. componentIndex,
  331. texOp.Dest,
  332. sources);
  333. for (int index = 0; index < texOp.SourcesCount; index++)
  334. {
  335. texOp.SetSource(index, null);
  336. }
  337. LinkedListNode<INode> oldNode = node;
  338. node = node.List.AddBefore(node, newTexOp);
  339. node.List.Remove(oldNode);
  340. return node;
  341. }
  342. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  343. {
  344. TextureOperation texOp = (TextureOperation)node.Value;
  345. // We can't query the format of a bindless texture,
  346. // because the handle is unknown, it can have any format.
  347. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  348. {
  349. return node;
  350. }
  351. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  352. int maxPositive = format switch
  353. {
  354. TextureFormat.R8Snorm => sbyte.MaxValue,
  355. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  356. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  357. TextureFormat.R16Snorm => short.MaxValue,
  358. TextureFormat.R16G16Snorm => short.MaxValue,
  359. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  360. _ => 0
  361. };
  362. // The value being 0 means that the format is not a SNORM format,
  363. // so there's nothing to do here.
  364. if (maxPositive == 0)
  365. {
  366. return node;
  367. }
  368. // Do normalization. We assume SINT formats are being used
  369. // as replacement for SNORM (which is not supported).
  370. INode[] uses = texOp.Dest.UseOps.ToArray();
  371. Operation convOp = new Operation(Instruction.ConvertS32ToFP, Local(), texOp.Dest);
  372. Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  373. node = node.List.AddAfter(node, convOp);
  374. node = node.List.AddAfter(node, normOp);
  375. foreach (INode useOp in uses)
  376. {
  377. if (useOp is not Operation op)
  378. {
  379. continue;
  380. }
  381. // Replace all uses of the texture pixel value with the normalized value.
  382. for (int index = 0; index < op.SourcesCount; index++)
  383. {
  384. if (op.GetSource(index) == texOp.Dest)
  385. {
  386. op.SetSource(index, normOp.Dest);
  387. }
  388. }
  389. }
  390. return node;
  391. }
  392. }
  393. }