ShaderConfig.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  2. using Ryujinx.Graphics.Shader.StructuredIr;
  3. using System;
  4. using System.Collections.Generic;
  5. using System.Linq;
  6. using System.Numerics;
  7. namespace Ryujinx.Graphics.Shader.Translation
  8. {
  9. class ShaderConfig
  10. {
  11. // TODO: Non-hardcoded array size.
  12. public const int SamplerArraySize = 4;
  13. private const int ThreadsPerWarp = 32;
  14. public ShaderStage Stage { get; }
  15. public bool GpPassthrough { get; }
  16. public bool LastInVertexPipeline { get; private set; }
  17. public bool HasLayerInputAttribute { get; private set; }
  18. public int GpLayerInputAttribute { get; private set; }
  19. public int ThreadsPerInputPrimitive { get; }
  20. public OutputTopology OutputTopology { get; }
  21. public int MaxOutputVertices { get; }
  22. public int LocalMemorySize { get; }
  23. public ImapPixelType[] ImapTypes { get; }
  24. public int OmapTargets { get; }
  25. public bool OmapSampleMask { get; }
  26. public bool OmapDepth { get; }
  27. public IGpuAccessor GpuAccessor { get; }
  28. public TranslationOptions Options { get; }
  29. public bool TransformFeedbackEnabled { get; }
  30. private TransformFeedbackOutput[] _transformFeedbackOutputs;
  31. readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
  32. {
  33. public IoVariable IoVariable { get; }
  34. public int Location { get; }
  35. public int Component { get; }
  36. public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
  37. {
  38. IoVariable = ioVariable;
  39. Location = location;
  40. Component = component;
  41. }
  42. public override bool Equals(object other)
  43. {
  44. return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
  45. }
  46. public bool Equals(TransformFeedbackVariable other)
  47. {
  48. return IoVariable == other.IoVariable &&
  49. Location == other.Location &&
  50. Component == other.Component;
  51. }
  52. public override int GetHashCode()
  53. {
  54. return (int)IoVariable | (Location << 8) | (Component << 16);
  55. }
  56. public override string ToString()
  57. {
  58. return $"{IoVariable}.{Location}.{Component}";
  59. }
  60. }
  61. private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
  62. public int Size { get; private set; }
  63. public byte ClipDistancesWritten { get; private set; }
  64. public FeatureFlags UsedFeatures { get; private set; }
  65. public int Cb1DataSize { get; private set; }
  66. public bool LayerOutputWritten { get; private set; }
  67. public int LayerOutputAttribute { get; private set; }
  68. public bool NextUsesFixedFuncAttributes { get; private set; }
  69. public int UsedInputAttributes { get; private set; }
  70. public int UsedOutputAttributes { get; private set; }
  71. public HashSet<int> UsedInputAttributesPerPatch { get; }
  72. public HashSet<int> UsedOutputAttributesPerPatch { get; }
  73. public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
  74. public int PassthroughAttributes { get; private set; }
  75. private int _nextUsedInputAttributes;
  76. private int _thisUsedInputAttributes;
  77. private Dictionary<int, int> _perPatchAttributeLocations;
  78. public UInt128 NextInputAttributesComponents { get; private set; }
  79. public UInt128 ThisInputAttributesComponents { get; private set; }
  80. public int AccessibleStorageBuffersMask { get; private set; }
  81. public int AccessibleConstantBuffersMask { get; private set; }
  82. private int _usedConstantBuffers;
  83. private int _usedStorageBuffers;
  84. private int _usedStorageBuffersWrite;
  85. private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
  86. private struct TextureMeta
  87. {
  88. public bool AccurateType;
  89. public SamplerType Type;
  90. public TextureUsageFlags UsageFlags;
  91. }
  92. private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
  93. private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
  94. private BufferDescriptor[] _cachedConstantBufferDescriptors;
  95. private BufferDescriptor[] _cachedStorageBufferDescriptors;
  96. private TextureDescriptor[] _cachedTextureDescriptors;
  97. private TextureDescriptor[] _cachedImageDescriptors;
  98. private int _firstConstantBufferBinding;
  99. private int _firstStorageBufferBinding;
  100. public int FirstConstantBufferBinding => _firstConstantBufferBinding;
  101. public int FirstStorageBufferBinding => _firstStorageBufferBinding;
  102. public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options)
  103. {
  104. Stage = ShaderStage.Compute;
  105. GpuAccessor = gpuAccessor;
  106. Options = options;
  107. _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
  108. AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
  109. AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
  110. UsedInputAttributesPerPatch = new HashSet<int>();
  111. UsedOutputAttributesPerPatch = new HashSet<int>();
  112. _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
  113. _usedImages = new Dictionary<TextureInfo, TextureMeta>();
  114. }
  115. public ShaderConfig(
  116. ShaderStage stage,
  117. OutputTopology outputTopology,
  118. int maxOutputVertices,
  119. IGpuAccessor gpuAccessor,
  120. TranslationOptions options) : this(gpuAccessor, options)
  121. {
  122. Stage = stage;
  123. ThreadsPerInputPrimitive = 1;
  124. OutputTopology = outputTopology;
  125. MaxOutputVertices = maxOutputVertices;
  126. TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
  127. if (Stage != ShaderStage.Compute)
  128. {
  129. AccessibleConstantBuffersMask = 0;
  130. }
  131. }
  132. public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options)
  133. {
  134. Stage = header.Stage;
  135. GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
  136. ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
  137. OutputTopology = header.OutputTopology;
  138. MaxOutputVertices = header.MaxOutputVertexCount;
  139. LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
  140. ImapTypes = header.ImapTypes;
  141. OmapTargets = header.OmapTargets;
  142. OmapSampleMask = header.OmapSampleMask;
  143. OmapDepth = header.OmapDepth;
  144. TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
  145. LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
  146. }
  147. private void EnsureTransformFeedbackInitialized()
  148. {
  149. if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
  150. {
  151. TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
  152. ulong vecMap = 0UL;
  153. for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
  154. {
  155. var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
  156. var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
  157. for (int i = 0; i < locations.Length; i++)
  158. {
  159. byte wordOffset = locations[i];
  160. if (wordOffset < 0xc0)
  161. {
  162. transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
  163. vecMap |= 1UL << (wordOffset / 4);
  164. }
  165. }
  166. }
  167. _transformFeedbackOutputs = transformFeedbackOutputs;
  168. while (vecMap != 0)
  169. {
  170. int vecIndex = BitOperations.TrailingZeroCount(vecMap);
  171. for (int subIndex = 0; subIndex < 4; subIndex++)
  172. {
  173. int wordOffset = vecIndex * 4 + subIndex;
  174. int byteOffset = wordOffset * 4;
  175. if (transformFeedbackOutputs[wordOffset].Valid)
  176. {
  177. IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
  178. int component = 0;
  179. if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
  180. {
  181. component = subIndex;
  182. }
  183. var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
  184. _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
  185. }
  186. }
  187. vecMap &= ~(1UL << vecIndex);
  188. }
  189. }
  190. }
  191. public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
  192. {
  193. EnsureTransformFeedbackInitialized();
  194. return _transformFeedbackOutputs;
  195. }
  196. public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
  197. {
  198. EnsureTransformFeedbackInitialized();
  199. var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
  200. return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
  201. }
  202. private bool HasTransformFeedbackOutputs()
  203. {
  204. return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
  205. }
  206. public bool HasTransformFeedbackOutputs(bool isOutput)
  207. {
  208. return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
  209. }
  210. public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
  211. {
  212. if (ioVariable == IoVariable.UserDefined)
  213. {
  214. return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
  215. (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
  216. }
  217. return ioVariable == IoVariable.FragmentOutputColor;
  218. }
  219. public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
  220. {
  221. if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
  222. {
  223. return false;
  224. }
  225. return GetTransformFeedbackOutputComponents(location, component) == 1;
  226. }
  227. public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
  228. {
  229. EnsureTransformFeedbackInitialized();
  230. return _transformFeedbackOutputs[wordOffset];
  231. }
  232. public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
  233. {
  234. return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
  235. }
  236. public int GetTransformFeedbackOutputComponents(int location, int component)
  237. {
  238. EnsureTransformFeedbackInitialized();
  239. int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
  240. int index = baseIndex + component;
  241. int count = 1;
  242. for (; count < 4; count++)
  243. {
  244. ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
  245. ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
  246. int prevOffset = prev.Offset;
  247. int currOffset = curr.Offset;
  248. if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
  249. {
  250. break;
  251. }
  252. }
  253. if (baseIndex + count <= index)
  254. {
  255. return 1;
  256. }
  257. return count;
  258. }
  259. public AggregateType GetFragmentOutputColorType(int location)
  260. {
  261. return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
  262. }
  263. public AggregateType GetUserDefinedType(int location, bool isOutput)
  264. {
  265. if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
  266. (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
  267. {
  268. return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
  269. }
  270. AggregateType type = AggregateType.Vector4;
  271. if (Stage == ShaderStage.Vertex && !isOutput)
  272. {
  273. type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
  274. }
  275. else
  276. {
  277. type |= AggregateType.FP32;
  278. }
  279. return type;
  280. }
  281. public int GetDepthRegister()
  282. {
  283. // The depth register is always two registers after the last color output.
  284. return BitOperations.PopCount((uint)OmapTargets) + 1;
  285. }
  286. public uint ConstantBuffer1Read(int offset)
  287. {
  288. if (Cb1DataSize < offset + 4)
  289. {
  290. Cb1DataSize = offset + 4;
  291. }
  292. return GpuAccessor.ConstantBuffer1Read(offset);
  293. }
  294. public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
  295. {
  296. // When the formatted load extension is supported, we don't need to
  297. // specify a format, we can just declare it without a format and the GPU will handle it.
  298. if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
  299. {
  300. return TextureFormat.Unknown;
  301. }
  302. var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
  303. if (format == TextureFormat.Unknown)
  304. {
  305. GpuAccessor.Log($"Unknown format for texture {handle}.");
  306. format = TextureFormat.R8G8B8A8Unorm;
  307. }
  308. return format;
  309. }
  310. private static bool FormatSupportsAtomic(TextureFormat format)
  311. {
  312. return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
  313. }
  314. public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
  315. {
  316. // Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
  317. // and must have a type specified. Default to R32Sint if not available.
  318. var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
  319. if (!FormatSupportsAtomic(format))
  320. {
  321. GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
  322. format = TextureFormat.R32Sint;
  323. }
  324. return format;
  325. }
  326. public void SizeAdd(int size)
  327. {
  328. Size += size;
  329. }
  330. public void InheritFrom(ShaderConfig other)
  331. {
  332. ClipDistancesWritten |= other.ClipDistancesWritten;
  333. UsedFeatures |= other.UsedFeatures;
  334. UsedInputAttributes |= other.UsedInputAttributes;
  335. UsedOutputAttributes |= other.UsedOutputAttributes;
  336. _usedConstantBuffers |= other._usedConstantBuffers;
  337. _usedStorageBuffers |= other._usedStorageBuffers;
  338. _usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
  339. foreach (var kv in other._usedTextures)
  340. {
  341. if (!_usedTextures.TryAdd(kv.Key, kv.Value))
  342. {
  343. _usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]);
  344. }
  345. }
  346. foreach (var kv in other._usedImages)
  347. {
  348. if (!_usedImages.TryAdd(kv.Key, kv.Value))
  349. {
  350. _usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]);
  351. }
  352. }
  353. }
  354. public void SetLayerOutputAttribute(int attr)
  355. {
  356. LayerOutputWritten = true;
  357. LayerOutputAttribute = attr;
  358. }
  359. public void SetGeometryShaderLayerInputAttribute(int attr)
  360. {
  361. HasLayerInputAttribute = true;
  362. GpLayerInputAttribute = attr;
  363. }
  364. public void SetLastInVertexPipeline()
  365. {
  366. LastInVertexPipeline = true;
  367. }
  368. public void SetInputUserAttributeFixedFunc(int index)
  369. {
  370. UsedInputAttributes |= 1 << index;
  371. }
  372. public void SetOutputUserAttributeFixedFunc(int index)
  373. {
  374. UsedOutputAttributes |= 1 << index;
  375. }
  376. public void SetInputUserAttribute(int index, int component)
  377. {
  378. int mask = 1 << index;
  379. UsedInputAttributes |= mask;
  380. _thisUsedInputAttributes |= mask;
  381. ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
  382. }
  383. public void SetInputUserAttributePerPatch(int index)
  384. {
  385. UsedInputAttributesPerPatch.Add(index);
  386. }
  387. public void SetOutputUserAttribute(int index)
  388. {
  389. UsedOutputAttributes |= 1 << index;
  390. }
  391. public void SetOutputUserAttributePerPatch(int index)
  392. {
  393. UsedOutputAttributesPerPatch.Add(index);
  394. }
  395. public void MergeFromtNextStage(ShaderConfig config)
  396. {
  397. NextInputAttributesComponents = config.ThisInputAttributesComponents;
  398. NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
  399. NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
  400. MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
  401. if (UsedOutputAttributesPerPatch.Count != 0)
  402. {
  403. // Regular and per-patch input/output locations can't overlap,
  404. // so we must assign on our location using unused regular input/output locations.
  405. Dictionary<int, int> locationsMap = new Dictionary<int, int>();
  406. int freeMask = ~UsedOutputAttributes;
  407. foreach (int attr in UsedOutputAttributesPerPatch)
  408. {
  409. int location = BitOperations.TrailingZeroCount(freeMask);
  410. if (location == 32)
  411. {
  412. config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
  413. break;
  414. }
  415. locationsMap.Add(attr, location);
  416. freeMask &= ~(1 << location);
  417. }
  418. // Both stages must agree on the locations, so use the same "map" for both.
  419. _perPatchAttributeLocations = locationsMap;
  420. config._perPatchAttributeLocations = locationsMap;
  421. }
  422. // We don't consider geometry shaders using the geometry shader passthrough feature
  423. // as being the last because when this feature is used, it can't actually modify any of the outputs,
  424. // so the stage that comes before it is the last one that can do modifications.
  425. if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
  426. {
  427. LastInVertexPipeline = false;
  428. }
  429. }
  430. public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
  431. {
  432. _nextUsedInputAttributes = mask;
  433. if (GpPassthrough)
  434. {
  435. PassthroughAttributes = mask & ~UsedOutputAttributes;
  436. }
  437. else
  438. {
  439. UsedOutputAttributes |= mask;
  440. UsedOutputAttributesPerPatch.UnionWith(perPatch);
  441. }
  442. }
  443. public int GetPerPatchAttributeLocation(int index)
  444. {
  445. if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
  446. {
  447. return index;
  448. }
  449. return location;
  450. }
  451. public bool IsUsedOutputAttribute(int attr)
  452. {
  453. // The check for fixed function attributes on the next stage is conservative,
  454. // returning false if the output is just not used by the next stage is also valid.
  455. if (NextUsesFixedFuncAttributes &&
  456. attr >= AttributeConsts.UserAttributeBase &&
  457. attr < AttributeConsts.UserAttributeEnd)
  458. {
  459. int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
  460. return (_nextUsedInputAttributes & (1 << index)) != 0;
  461. }
  462. return true;
  463. }
  464. public int GetFreeUserAttribute(bool isOutput, int index)
  465. {
  466. int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
  467. int bit = -1;
  468. while (useMask != -1)
  469. {
  470. bit = BitOperations.TrailingZeroCount(~useMask);
  471. if (bit == 32)
  472. {
  473. bit = -1;
  474. break;
  475. }
  476. else if (index < 1)
  477. {
  478. break;
  479. }
  480. useMask |= 1 << bit;
  481. index--;
  482. }
  483. return bit;
  484. }
  485. public void SetAllInputUserAttributes()
  486. {
  487. UsedInputAttributes |= Constants.AllAttributesMask;
  488. ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
  489. }
  490. public void SetAllOutputUserAttributes()
  491. {
  492. UsedOutputAttributes |= Constants.AllAttributesMask;
  493. }
  494. public void SetClipDistanceWritten(int index)
  495. {
  496. ClipDistancesWritten |= (byte)(1 << index);
  497. }
  498. public void SetUsedFeature(FeatureFlags flags)
  499. {
  500. UsedFeatures |= flags;
  501. }
  502. public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
  503. {
  504. AccessibleStorageBuffersMask = sbMask;
  505. AccessibleConstantBuffersMask = ubeMask;
  506. }
  507. public void SetUsedConstantBuffer(int slot)
  508. {
  509. _usedConstantBuffers |= 1 << slot;
  510. }
  511. public void SetUsedStorageBuffer(int slot, bool write)
  512. {
  513. int mask = 1 << slot;
  514. _usedStorageBuffers |= mask;
  515. if (write)
  516. {
  517. _usedStorageBuffersWrite |= mask;
  518. }
  519. }
  520. public void SetUsedTexture(
  521. Instruction inst,
  522. SamplerType type,
  523. TextureFormat format,
  524. TextureFlags flags,
  525. int cbufSlot,
  526. int handle)
  527. {
  528. inst &= Instruction.Mask;
  529. bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
  530. bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
  531. bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize;
  532. bool coherent = flags.HasFlag(TextureFlags.Coherent);
  533. if (isImage)
  534. {
  535. SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent);
  536. }
  537. else
  538. {
  539. bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize;
  540. SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent);
  541. }
  542. GpuAccessor.RegisterTexture(handle, cbufSlot);
  543. }
  544. private void SetUsedTextureOrImage(
  545. Dictionary<TextureInfo, TextureMeta> dict,
  546. int cbufSlot,
  547. int handle,
  548. SamplerType type,
  549. TextureFormat format,
  550. bool intCoords,
  551. bool write,
  552. bool accurateType,
  553. bool coherent)
  554. {
  555. var dimensions = type.GetDimensions();
  556. var isIndexed = type.HasFlag(SamplerType.Indexed);
  557. var usageFlags = TextureUsageFlags.None;
  558. if (intCoords)
  559. {
  560. usageFlags |= TextureUsageFlags.NeedsScaleValue;
  561. var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2;
  562. if (!canScale)
  563. {
  564. // Resolution scaling cannot be applied to this texture right now.
  565. // Flag so that we know to blacklist scaling on related textures when binding them.
  566. usageFlags |= TextureUsageFlags.ResScaleUnsupported;
  567. }
  568. }
  569. if (write)
  570. {
  571. usageFlags |= TextureUsageFlags.ImageStore;
  572. }
  573. if (coherent)
  574. {
  575. usageFlags |= TextureUsageFlags.ImageCoherent;
  576. }
  577. int arraySize = isIndexed ? SamplerArraySize : 1;
  578. for (int layer = 0; layer < arraySize; layer++)
  579. {
  580. var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format);
  581. var meta = new TextureMeta()
  582. {
  583. AccurateType = accurateType,
  584. Type = type,
  585. UsageFlags = usageFlags
  586. };
  587. if (dict.TryGetValue(info, out var existingMeta))
  588. {
  589. dict[info] = MergeTextureMeta(meta, existingMeta);
  590. }
  591. else
  592. {
  593. dict.Add(info, meta);
  594. }
  595. }
  596. }
  597. private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta)
  598. {
  599. meta.UsageFlags |= existingMeta.UsageFlags;
  600. // If the texture we have has inaccurate type information, then
  601. // we prefer the most accurate one.
  602. if (existingMeta.AccurateType)
  603. {
  604. meta.AccurateType = true;
  605. meta.Type = existingMeta.Type;
  606. }
  607. return meta;
  608. }
  609. public BufferDescriptor[] GetConstantBufferDescriptors()
  610. {
  611. if (_cachedConstantBufferDescriptors != null)
  612. {
  613. return _cachedConstantBufferDescriptors;
  614. }
  615. int usedMask = _usedConstantBuffers;
  616. if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
  617. {
  618. usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
  619. }
  620. return _cachedConstantBufferDescriptors = GetBufferDescriptors(
  621. usedMask,
  622. 0,
  623. UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
  624. out _firstConstantBufferBinding,
  625. GpuAccessor.QueryBindingConstantBuffer);
  626. }
  627. public BufferDescriptor[] GetStorageBufferDescriptors()
  628. {
  629. if (_cachedStorageBufferDescriptors != null)
  630. {
  631. return _cachedStorageBufferDescriptors;
  632. }
  633. return _cachedStorageBufferDescriptors = GetBufferDescriptors(
  634. _usedStorageBuffers,
  635. _usedStorageBuffersWrite,
  636. true,
  637. out _firstStorageBufferBinding,
  638. GpuAccessor.QueryBindingStorageBuffer);
  639. }
  640. private static BufferDescriptor[] GetBufferDescriptors(
  641. int usedMask,
  642. int writtenMask,
  643. bool isArray,
  644. out int firstBinding,
  645. Func<int, int> getBindingCallback)
  646. {
  647. firstBinding = 0;
  648. bool hasFirstBinding = false;
  649. var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
  650. int lastSlot = -1;
  651. for (int i = 0; i < descriptors.Length; i++)
  652. {
  653. int slot = BitOperations.TrailingZeroCount(usedMask);
  654. if (isArray)
  655. {
  656. // The next array entries also consumes bindings, even if they are unused.
  657. for (int j = lastSlot + 1; j < slot; j++)
  658. {
  659. int binding = getBindingCallback(j);
  660. if (!hasFirstBinding)
  661. {
  662. firstBinding = binding;
  663. hasFirstBinding = true;
  664. }
  665. }
  666. }
  667. lastSlot = slot;
  668. descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
  669. if (!hasFirstBinding)
  670. {
  671. firstBinding = descriptors[i].Binding;
  672. hasFirstBinding = true;
  673. }
  674. if ((writtenMask & (1 << slot)) != 0)
  675. {
  676. descriptors[i].SetFlag(BufferUsageFlags.Write);
  677. }
  678. usedMask &= ~(1 << slot);
  679. }
  680. return descriptors;
  681. }
  682. public TextureDescriptor[] GetTextureDescriptors()
  683. {
  684. return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
  685. }
  686. public TextureDescriptor[] GetImageDescriptors()
  687. {
  688. return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage);
  689. }
  690. private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback)
  691. {
  692. var descriptors = new TextureDescriptor[dict.Count];
  693. int i = 0;
  694. foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle))
  695. {
  696. var info = kv.Key;
  697. var meta = kv.Value;
  698. bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer;
  699. int binding = getBindingCallback(i, isBuffer);
  700. descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle);
  701. descriptors[i].SetFlag(meta.UsageFlags);
  702. i++;
  703. }
  704. return descriptors;
  705. }
  706. public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp)
  707. {
  708. TextureDescriptor[] descriptors = GetTextureDescriptors();
  709. for (int i = 0; i < descriptors.Length; i++)
  710. {
  711. var descriptor = descriptors[i];
  712. if (descriptor.CbufSlot == texOp.CbufSlot &&
  713. descriptor.HandleIndex == texOp.Handle &&
  714. descriptor.Format == texOp.Format)
  715. {
  716. return (descriptor, i);
  717. }
  718. }
  719. return (default, -1);
  720. }
  721. private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp)
  722. {
  723. for (int i = 0; i < array.Length; i++)
  724. {
  725. var descriptor = array[i];
  726. if (descriptor.Type == texOp.Type &&
  727. descriptor.CbufSlot == texOp.CbufSlot &&
  728. descriptor.HandleIndex == texOp.Handle &&
  729. descriptor.Format == texOp.Format)
  730. {
  731. return i;
  732. }
  733. }
  734. return -1;
  735. }
  736. public int FindTextureDescriptorIndex(AstTextureOperation texOp)
  737. {
  738. return FindDescriptorIndex(GetTextureDescriptors(), texOp);
  739. }
  740. public int FindImageDescriptorIndex(AstTextureOperation texOp)
  741. {
  742. return FindDescriptorIndex(GetImageDescriptors(), texOp);
  743. }
  744. public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
  745. {
  746. return new ShaderProgramInfo(
  747. GetConstantBufferDescriptors(),
  748. GetStorageBufferDescriptors(),
  749. GetTextureDescriptors(),
  750. GetImageDescriptors(),
  751. identification,
  752. GpLayerInputAttribute,
  753. Stage,
  754. UsedFeatures.HasFlag(FeatureFlags.InstanceId),
  755. UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
  756. UsedFeatures.HasFlag(FeatureFlags.RtLayer),
  757. ClipDistancesWritten,
  758. OmapTargets);
  759. }
  760. }
  761. }