Rewriter.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using Ryujinx.Graphics.Shader.StructuredIr;
  3. using Ryujinx.Graphics.Shader.Translation.Optimizations;
  4. using System.Collections.Generic;
  5. using System.Diagnostics;
  6. using System.Linq;
  7. using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  8. namespace Ryujinx.Graphics.Shader.Translation
  9. {
  10. static class Rewriter
  11. {
  12. public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
  13. {
  14. bool isVertexShader = config.Stage == ShaderStage.Vertex;
  15. bool isImpreciseFragmentShader = config.Stage == ShaderStage.Fragment && config.GpuAccessor.QueryHostReducedPrecision();
  16. bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
  17. bool hasVectorIndexingBug = config.GpuAccessor.QueryHostHasVectorIndexingBug();
  18. bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
  19. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
  20. {
  21. BasicBlock block = blocks[blkIndex];
  22. for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
  23. {
  24. if (node.Value is not Operation operation)
  25. {
  26. continue;
  27. }
  28. if (isVertexShader)
  29. {
  30. if (hasConstantBufferDrawParameters)
  31. {
  32. if (ReplaceConstantBufferWithDrawParameters(node, operation))
  33. {
  34. config.SetUsedFeature(FeatureFlags.DrawParameters);
  35. }
  36. }
  37. else if (HasConstantBufferDrawParameters(operation))
  38. {
  39. config.SetUsedFeature(FeatureFlags.DrawParameters);
  40. }
  41. }
  42. if (isImpreciseFragmentShader)
  43. {
  44. EnableForcePreciseIfNeeded(operation);
  45. }
  46. if (hasVectorIndexingBug)
  47. {
  48. InsertVectorComponentSelect(node, config);
  49. }
  50. if (operation is TextureOperation texOp)
  51. {
  52. node = InsertTexelFetchScale(hfm, node, config);
  53. node = InsertTextureSizeUnscale(hfm, node, config);
  54. if (texOp.Inst == Instruction.TextureSample)
  55. {
  56. node = InsertCoordNormalization(node, config);
  57. node = InsertCoordGatherBias(node, config);
  58. node = InsertConstOffsets(node, config);
  59. if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
  60. {
  61. node = InsertSnormNormalization(node, config);
  62. }
  63. }
  64. }
  65. else
  66. {
  67. node = InsertSharedStoreSmallInt(hfm, node);
  68. if (config.Options.TargetLanguage != TargetLanguage.Spirv)
  69. {
  70. node = InsertSharedAtomicSigned(hfm, node);
  71. }
  72. }
  73. }
  74. }
  75. }
  76. private static void EnableForcePreciseIfNeeded(Operation operation)
  77. {
  78. // There are some cases where a small bias is added to values to prevent division by zero.
  79. // When operating with reduced precision, it is possible for this bias to get rounded to 0
  80. // and cause a division by zero.
  81. // To prevent that, we force those operations to be precise even if the host wants
  82. // imprecise operations for performance.
  83. if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
  84. operation.GetSource(0).Type == OperandType.Constant &&
  85. operation.GetSource(0).AsFloat() == 1f &&
  86. operation.GetSource(1).AsgOp is Operation addOp &&
  87. addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
  88. addOp.GetSource(1).Type == OperandType.Constant)
  89. {
  90. addOp.ForcePrecise = true;
  91. }
  92. }
  93. private static void InsertVectorComponentSelect(LinkedListNode<INode> node, ShaderConfig config)
  94. {
  95. Operation operation = (Operation)node.Value;
  96. if (operation.Inst != Instruction.Load ||
  97. operation.StorageKind != StorageKind.ConstantBuffer ||
  98. operation.SourcesCount < 3)
  99. {
  100. return;
  101. }
  102. Operand bindingIndex = operation.GetSource(0);
  103. Operand fieldIndex = operation.GetSource(1);
  104. Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
  105. if (bindingIndex.Type != OperandType.Constant ||
  106. fieldIndex.Type != OperandType.Constant ||
  107. elemIndex.Type == OperandType.Constant)
  108. {
  109. return;
  110. }
  111. BufferDefinition buffer = config.Properties.ConstantBuffers[bindingIndex.Value];
  112. StructureField field = buffer.Type.Fields[fieldIndex.Value];
  113. int elemCount = (field.Type & AggregateType.ElementCountMask) switch
  114. {
  115. AggregateType.Vector2 => 2,
  116. AggregateType.Vector3 => 3,
  117. AggregateType.Vector4 => 4,
  118. _ => 1,
  119. };
  120. if (elemCount == 1)
  121. {
  122. return;
  123. }
  124. Operand result = null;
  125. for (int i = 0; i < elemCount; i++)
  126. {
  127. Operand value = Local();
  128. Operand[] inputs = new Operand[operation.SourcesCount];
  129. for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
  130. {
  131. inputs[srcIndex] = operation.GetSource(srcIndex);
  132. }
  133. inputs[^1] = Const(i);
  134. Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
  135. node.List.AddBefore(node, loadOp);
  136. if (i == 0)
  137. {
  138. result = value;
  139. }
  140. else
  141. {
  142. Operand isCurrentIndex = Local();
  143. Operand selection = Local();
  144. Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
  145. Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
  146. node.List.AddBefore(node, compareOp);
  147. node.List.AddBefore(node, selectOp);
  148. result = selection;
  149. }
  150. }
  151. operation.TurnIntoCopy(result);
  152. }
  153. private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node)
  154. {
  155. Operation operation = (Operation)node.Value;
  156. HelperFunctionName name;
  157. if (operation.StorageKind == StorageKind.SharedMemory8)
  158. {
  159. name = HelperFunctionName.SharedStore8;
  160. }
  161. else if (operation.StorageKind == StorageKind.SharedMemory16)
  162. {
  163. name = HelperFunctionName.SharedStore16;
  164. }
  165. else
  166. {
  167. return node;
  168. }
  169. if (operation.Inst != Instruction.Store)
  170. {
  171. return node;
  172. }
  173. Operand memoryId = operation.GetSource(0);
  174. Operand byteOffset = operation.GetSource(1);
  175. Operand value = operation.GetSource(2);
  176. Debug.Assert(memoryId.Type == OperandType.Constant);
  177. int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
  178. Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
  179. LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
  180. Utils.DeleteNode(node, operation);
  181. return newNode;
  182. }
  183. private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node)
  184. {
  185. Operation operation = (Operation)node.Value;
  186. HelperFunctionName name;
  187. if (operation.Inst == Instruction.AtomicMaxS32)
  188. {
  189. name = HelperFunctionName.SharedAtomicMaxS32;
  190. }
  191. else if (operation.Inst == Instruction.AtomicMinS32)
  192. {
  193. name = HelperFunctionName.SharedAtomicMinS32;
  194. }
  195. else
  196. {
  197. return node;
  198. }
  199. if (operation.StorageKind != StorageKind.SharedMemory)
  200. {
  201. return node;
  202. }
  203. Operand result = operation.Dest;
  204. Operand memoryId = operation.GetSource(0);
  205. Operand byteOffset = operation.GetSource(1);
  206. Operand value = operation.GetSource(2);
  207. Debug.Assert(memoryId.Type == OperandType.Constant);
  208. int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
  209. Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
  210. LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
  211. Utils.DeleteNode(node, operation);
  212. return newNode;
  213. }
  214. private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
  215. {
  216. TextureOperation texOp = (TextureOperation)node.Value;
  217. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  218. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  219. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  220. int coordsCount = texOp.Type.GetDimensions();
  221. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  222. bool isImage = IsImageInstructionWithScale(texOp.Inst);
  223. if ((texOp.Inst == Instruction.TextureSample || isImage) &&
  224. (intCoords || isImage) &&
  225. !isBindless &&
  226. !isIndexed &&
  227. config.Stage.SupportsRenderScale() &&
  228. TypeSupportsScale(texOp.Type))
  229. {
  230. int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale);
  231. int samplerIndex = isImage
  232. ? config.GetTextureDescriptors().Length + config.FindImageDescriptorIndex(texOp)
  233. : config.FindTextureDescriptorIndex(texOp);
  234. for (int index = 0; index < coordsCount; index++)
  235. {
  236. Operand scaledCoord = Local();
  237. Operand[] callArgs;
  238. if (config.Stage == ShaderStage.Fragment)
  239. {
  240. callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex), Const(index) };
  241. }
  242. else
  243. {
  244. callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex) };
  245. }
  246. node.List.AddBefore(node, new Operation(Instruction.Call, 0, scaledCoord, callArgs));
  247. texOp.SetSource(coordsIndex + index, scaledCoord);
  248. }
  249. }
  250. return node;
  251. }
  252. private static LinkedListNode<INode> InsertTextureSizeUnscale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
  253. {
  254. TextureOperation texOp = (TextureOperation)node.Value;
  255. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  256. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  257. if (texOp.Inst == Instruction.TextureSize &&
  258. texOp.Index < 2 &&
  259. !isBindless &&
  260. !isIndexed &&
  261. config.Stage.SupportsRenderScale() &&
  262. TypeSupportsScale(texOp.Type))
  263. {
  264. int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TextureSizeUnscale);
  265. int samplerIndex = config.FindTextureDescriptorIndex(texOp, ignoreType: true);
  266. for (int index = texOp.DestsCount - 1; index >= 0; index--)
  267. {
  268. Operand dest = texOp.GetDest(index);
  269. Operand unscaledSize = Local();
  270. // Replace all uses with the unscaled size value.
  271. // This must be done before the call is added, since it also is a use of the original size.
  272. foreach (INode useOp in dest.UseOps)
  273. {
  274. for (int srcIndex = 0; srcIndex < useOp.SourcesCount; srcIndex++)
  275. {
  276. if (useOp.GetSource(srcIndex) == dest)
  277. {
  278. useOp.SetSource(srcIndex, unscaledSize);
  279. }
  280. }
  281. }
  282. Operand[] callArgs = new Operand[] { Const(functionId), dest, Const(samplerIndex) };
  283. node.List.AddAfter(node, new Operation(Instruction.Call, 0, unscaledSize, callArgs));
  284. }
  285. }
  286. return node;
  287. }
  288. private static bool IsImageInstructionWithScale(Instruction inst)
  289. {
  290. // Currently, we don't support scaling images that are modified,
  291. // so we only need to care about the load instruction.
  292. return inst == Instruction.ImageLoad;
  293. }
  294. private static bool TypeSupportsScale(SamplerType type)
  295. {
  296. return (type & SamplerType.Mask) == SamplerType.Texture2D;
  297. }
  298. private static LinkedListNode<INode> InsertCoordNormalization(LinkedListNode<INode> node, ShaderConfig config)
  299. {
  300. // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
  301. // Without normalization, the coordinates are expected to the in the [0, W or H] range,
  302. // and otherwise, it is expected to be in the [0, 1] range.
  303. // We normalize by dividing the coords by the texture size.
  304. TextureOperation texOp = (TextureOperation)node.Value;
  305. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  306. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  307. bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
  308. if (isCoordNormalized || intCoords)
  309. {
  310. return node;
  311. }
  312. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  313. int coordsCount = texOp.Type.GetDimensions();
  314. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  315. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  316. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  317. for (int index = 0; index < normCoordsCount; index++)
  318. {
  319. Operand coordSize = Local();
  320. Operand[] texSizeSources;
  321. if (isBindless || isIndexed)
  322. {
  323. texSizeSources = new Operand[] { texOp.GetSource(0), Const(0) };
  324. }
  325. else
  326. {
  327. texSizeSources = new Operand[] { Const(0) };
  328. }
  329. node.List.AddBefore(node, new TextureOperation(
  330. Instruction.TextureSize,
  331. texOp.Type,
  332. texOp.Format,
  333. texOp.Flags,
  334. texOp.CbufSlot,
  335. texOp.Handle,
  336. index,
  337. new[] { coordSize },
  338. texSizeSources));
  339. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  340. Operand source = texOp.GetSource(coordsIndex + index);
  341. Operand coordNormalized = Local();
  342. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, GenerateI2f(node, coordSize)));
  343. texOp.SetSource(coordsIndex + index, coordNormalized);
  344. }
  345. return node;
  346. }
  347. private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ShaderConfig config)
  348. {
  349. // The gather behavior when the coordinate sits right in the middle of two texels is not well defined.
  350. // To ensure the correct texel is sampled, we add a small bias value to the coordinate.
  351. // This value is calculated as the minimum value required to change the texel it will sample from,
  352. // and is 0 if the host does not require the bias.
  353. TextureOperation texOp = (TextureOperation)node.Value;
  354. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  355. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  356. int gatherBiasPrecision = config.GpuAccessor.QueryHostGatherBiasPrecision();
  357. if (!isGather || gatherBiasPrecision == 0)
  358. {
  359. return node;
  360. }
  361. #pragma warning disable IDE0059 // Remove unnecessary value assignment
  362. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  363. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  364. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  365. #pragma warning restore IDE0059
  366. int coordsCount = texOp.Type.GetDimensions();
  367. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  368. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  369. int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
  370. for (int index = 0; index < normCoordsCount; index++)
  371. {
  372. Operand coordSize = Local();
  373. Operand scaledSize = Local();
  374. Operand bias = Local();
  375. Operand[] texSizeSources;
  376. if (isBindless || isIndexed)
  377. {
  378. texSizeSources = new Operand[] { texOp.GetSource(0), Const(0) };
  379. }
  380. else
  381. {
  382. texSizeSources = new Operand[] { Const(0) };
  383. }
  384. node.List.AddBefore(node, new TextureOperation(
  385. Instruction.TextureSize,
  386. texOp.Type,
  387. texOp.Format,
  388. texOp.Flags,
  389. texOp.CbufSlot,
  390. texOp.Handle,
  391. index,
  392. new[] { coordSize },
  393. texSizeSources));
  394. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  395. node.List.AddBefore(node, new Operation(
  396. Instruction.FP32 | Instruction.Multiply,
  397. scaledSize,
  398. GenerateI2f(node, coordSize),
  399. ConstF((float)(1 << (gatherBiasPrecision + 1)))));
  400. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, bias, ConstF(1f), scaledSize));
  401. Operand source = texOp.GetSource(coordsIndex + index);
  402. Operand coordBiased = Local();
  403. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordBiased, source, bias));
  404. texOp.SetSource(coordsIndex + index, coordBiased);
  405. }
  406. return node;
  407. }
  408. private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ShaderConfig config)
  409. {
  410. // Non-constant texture offsets are not allowed (according to the spec),
  411. // however some GPUs does support that.
  412. // For GPUs where it is not supported, we can replace the instruction with the following:
  413. // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
  414. // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
  415. // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
  416. // For textureGatherOffset, we split the operation into up to 4 operations, one for each component
  417. // that is accessed, where each textureGather operation has a different offset for each pixel.
  418. TextureOperation texOp = (TextureOperation)node.Value;
  419. bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
  420. bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
  421. bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
  422. bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
  423. if (!hasInvalidOffset)
  424. {
  425. return node;
  426. }
  427. bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
  428. bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
  429. bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
  430. bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
  431. bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
  432. bool isArray = (texOp.Type & SamplerType.Array) != 0;
  433. bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
  434. bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
  435. bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
  436. int coordsCount = texOp.Type.GetDimensions();
  437. int offsetsCount;
  438. if (hasOffsets)
  439. {
  440. offsetsCount = coordsCount * 4;
  441. }
  442. else if (hasOffset)
  443. {
  444. offsetsCount = coordsCount;
  445. }
  446. else
  447. {
  448. offsetsCount = 0;
  449. }
  450. Operand[] offsets = new Operand[offsetsCount];
  451. Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
  452. int copyCount = 0;
  453. if (isBindless || isIndexed)
  454. {
  455. copyCount++;
  456. }
  457. Operand[] lodSources = new Operand[copyCount + coordsCount];
  458. for (int index = 0; index < lodSources.Length; index++)
  459. {
  460. lodSources[index] = texOp.GetSource(index);
  461. }
  462. copyCount += coordsCount;
  463. if (isArray)
  464. {
  465. copyCount++;
  466. }
  467. if (isShadow)
  468. {
  469. copyCount++;
  470. }
  471. if (hasDerivatives)
  472. {
  473. copyCount += coordsCount * 2;
  474. }
  475. if (isMultisample)
  476. {
  477. copyCount++;
  478. }
  479. else if (hasLodLevel)
  480. {
  481. copyCount++;
  482. }
  483. int srcIndex = 0;
  484. int dstIndex = 0;
  485. for (int index = 0; index < copyCount; index++)
  486. {
  487. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  488. }
  489. bool areAllOffsetsConstant = true;
  490. for (int index = 0; index < offsetsCount; index++)
  491. {
  492. Operand offset = texOp.GetSource(srcIndex++);
  493. areAllOffsetsConstant &= offset.Type == OperandType.Constant;
  494. offsets[index] = offset;
  495. }
  496. hasInvalidOffset &= !areAllOffsetsConstant;
  497. if (!hasInvalidOffset)
  498. {
  499. return node;
  500. }
  501. if (hasLodBias)
  502. {
  503. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  504. }
  505. if (isGather && !isShadow)
  506. {
  507. sources[dstIndex++] = texOp.GetSource(srcIndex++);
  508. }
  509. int coordsIndex = isBindless || isIndexed ? 1 : 0;
  510. int componentIndex = texOp.Index;
  511. Operand[] dests = new Operand[texOp.DestsCount];
  512. for (int i = 0; i < texOp.DestsCount; i++)
  513. {
  514. dests[i] = texOp.GetDest(i);
  515. }
  516. Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null;
  517. LinkedListNode<INode> oldNode = node;
  518. if (isGather && !isShadow)
  519. {
  520. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  521. Operand[] newSources = new Operand[sources.Length];
  522. sources.CopyTo(newSources, 0);
  523. Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
  524. int destIndex = 0;
  525. for (int compIndex = 0; compIndex < 4; compIndex++)
  526. {
  527. if (((texOp.Index >> compIndex) & 1) == 0)
  528. {
  529. continue;
  530. }
  531. for (int index = 0; index < coordsCount; index++)
  532. {
  533. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  534. Operand offset = Local();
  535. Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
  536. node.List.AddBefore(node, new Operation(
  537. Instruction.FP32 | Instruction.Divide,
  538. offset,
  539. GenerateI2f(node, intOffset),
  540. GenerateI2f(node, texSizes[index])));
  541. Operand source = sources[coordsIndex + index];
  542. Operand coordPlusOffset = Local();
  543. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  544. newSources[coordsIndex + index] = coordPlusOffset;
  545. }
  546. TextureOperation newTexOp = new(
  547. Instruction.TextureSample,
  548. texOp.Type,
  549. texOp.Format,
  550. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  551. texOp.CbufSlot,
  552. texOp.Handle,
  553. 1,
  554. new[] { dests[destIndex++] },
  555. newSources);
  556. node = node.List.AddBefore(node, newTexOp);
  557. }
  558. }
  559. else
  560. {
  561. if (intCoords)
  562. {
  563. for (int index = 0; index < coordsCount; index++)
  564. {
  565. Operand source = sources[coordsIndex + index];
  566. Operand coordPlusOffset = Local();
  567. node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
  568. sources[coordsIndex + index] = coordPlusOffset;
  569. }
  570. }
  571. else
  572. {
  573. config.SetUsedFeature(FeatureFlags.IntegerSampling);
  574. Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
  575. for (int index = 0; index < coordsCount; index++)
  576. {
  577. config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
  578. Operand offset = Local();
  579. Operand intOffset = offsets[index];
  580. node.List.AddBefore(node, new Operation(
  581. Instruction.FP32 | Instruction.Divide,
  582. offset,
  583. GenerateI2f(node, intOffset),
  584. GenerateI2f(node, texSizes[index])));
  585. Operand source = sources[coordsIndex + index];
  586. Operand coordPlusOffset = Local();
  587. node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
  588. sources[coordsIndex + index] = coordPlusOffset;
  589. }
  590. }
  591. TextureOperation newTexOp = new(
  592. Instruction.TextureSample,
  593. texOp.Type,
  594. texOp.Format,
  595. texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
  596. texOp.CbufSlot,
  597. texOp.Handle,
  598. componentIndex,
  599. dests,
  600. sources);
  601. node = node.List.AddBefore(node, newTexOp);
  602. }
  603. node.List.Remove(oldNode);
  604. for (int index = 0; index < texOp.SourcesCount; index++)
  605. {
  606. texOp.SetSource(index, null);
  607. }
  608. return node;
  609. }
  610. private static Operand[] InsertTextureLod(
  611. LinkedListNode<INode> node,
  612. TextureOperation texOp,
  613. Operand[] lodSources,
  614. Operand bindlessHandle,
  615. int coordsCount)
  616. {
  617. Operand[] texSizes = new Operand[coordsCount];
  618. Operand lod = Local();
  619. node.List.AddBefore(node, new TextureOperation(
  620. Instruction.Lod,
  621. texOp.Type,
  622. texOp.Format,
  623. texOp.Flags,
  624. texOp.CbufSlot,
  625. texOp.Handle,
  626. 0,
  627. new[] { lod },
  628. lodSources));
  629. for (int index = 0; index < coordsCount; index++)
  630. {
  631. texSizes[index] = Local();
  632. Operand[] texSizeSources;
  633. if (bindlessHandle != null)
  634. {
  635. texSizeSources = new Operand[] { bindlessHandle, GenerateF2i(node, lod) };
  636. }
  637. else
  638. {
  639. texSizeSources = new Operand[] { GenerateF2i(node, lod) };
  640. }
  641. node.List.AddBefore(node, new TextureOperation(
  642. Instruction.TextureSize,
  643. texOp.Type,
  644. texOp.Format,
  645. texOp.Flags,
  646. texOp.CbufSlot,
  647. texOp.Handle,
  648. index,
  649. new[] { texSizes[index] },
  650. texSizeSources));
  651. }
  652. return texSizes;
  653. }
  654. private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
  655. {
  656. TextureOperation texOp = (TextureOperation)node.Value;
  657. // We can't query the format of a bindless texture,
  658. // because the handle is unknown, it can have any format.
  659. if (texOp.Flags.HasFlag(TextureFlags.Bindless))
  660. {
  661. return node;
  662. }
  663. TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
  664. int maxPositive = format switch
  665. {
  666. TextureFormat.R8Snorm => sbyte.MaxValue,
  667. TextureFormat.R8G8Snorm => sbyte.MaxValue,
  668. TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
  669. TextureFormat.R16Snorm => short.MaxValue,
  670. TextureFormat.R16G16Snorm => short.MaxValue,
  671. TextureFormat.R16G16B16A16Snorm => short.MaxValue,
  672. _ => 0,
  673. };
  674. // The value being 0 means that the format is not a SNORM format,
  675. // so there's nothing to do here.
  676. if (maxPositive == 0)
  677. {
  678. return node;
  679. }
  680. // Do normalization. We assume SINT formats are being used
  681. // as replacement for SNORM (which is not supported).
  682. for (int i = 0; i < texOp.DestsCount; i++)
  683. {
  684. Operand dest = texOp.GetDest(i);
  685. INode[] uses = dest.UseOps.ToArray();
  686. Operation convOp = new(Instruction.ConvertS32ToFP32, Local(), dest);
  687. Operation normOp = new(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
  688. node = node.List.AddAfter(node, convOp);
  689. node = node.List.AddAfter(node, normOp);
  690. foreach (INode useOp in uses)
  691. {
  692. if (useOp is not Operation op)
  693. {
  694. continue;
  695. }
  696. // Replace all uses of the texture pixel value with the normalized value.
  697. for (int index = 0; index < op.SourcesCount; index++)
  698. {
  699. if (op.GetSource(index) == dest)
  700. {
  701. op.SetSource(index, normOp.Dest);
  702. }
  703. }
  704. }
  705. }
  706. return node;
  707. }
  708. private static Operand GenerateI2f(LinkedListNode<INode> node, Operand value)
  709. {
  710. Operand res = Local();
  711. node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
  712. return res;
  713. }
  714. private static Operand GenerateF2i(LinkedListNode<INode> node, Operand value)
  715. {
  716. Operand res = Local();
  717. node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
  718. return res;
  719. }
  720. private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
  721. {
  722. Operand GenerateLoad(IoVariable ioVariable)
  723. {
  724. Operand value = Local();
  725. node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
  726. return value;
  727. }
  728. bool modified = false;
  729. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  730. {
  731. Operand src = operation.GetSource(srcIndex);
  732. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  733. {
  734. switch (src.GetCbufOffset())
  735. {
  736. case Constants.NvnBaseVertexByteOffset / 4:
  737. operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
  738. modified = true;
  739. break;
  740. case Constants.NvnBaseInstanceByteOffset / 4:
  741. operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
  742. modified = true;
  743. break;
  744. case Constants.NvnDrawIndexByteOffset / 4:
  745. operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
  746. modified = true;
  747. break;
  748. }
  749. }
  750. }
  751. return modified;
  752. }
  753. private static bool HasConstantBufferDrawParameters(Operation operation)
  754. {
  755. for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
  756. {
  757. Operand src = operation.GetSource(srcIndex);
  758. if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
  759. {
  760. switch (src.GetCbufOffset())
  761. {
  762. case Constants.NvnBaseVertexByteOffset / 4:
  763. case Constants.NvnBaseInstanceByteOffset / 4:
  764. case Constants.NvnDrawIndexByteOffset / 4:
  765. return true;
  766. }
  767. }
  768. }
  769. return false;
  770. }
  771. }
  772. }