BarrierBatch.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. using Silk.NET.Vulkan;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Runtime.CompilerServices;
  5. namespace Ryujinx.Graphics.Vulkan
  6. {
  7. internal class BarrierBatch : IDisposable
  8. {
  9. private const int MaxBarriersPerCall = 16;
  10. private const AccessFlags BaseAccess = AccessFlags.ShaderReadBit | AccessFlags.ShaderWriteBit;
  11. private const AccessFlags BufferAccess = AccessFlags.IndexReadBit | AccessFlags.VertexAttributeReadBit | AccessFlags.UniformReadBit;
  12. private const AccessFlags CommandBufferAccess = AccessFlags.IndirectCommandReadBit;
  13. private readonly VulkanRenderer _gd;
  14. private readonly NativeArray<MemoryBarrier> _memoryBarrierBatch = new(MaxBarriersPerCall);
  15. private readonly NativeArray<BufferMemoryBarrier> _bufferBarrierBatch = new(MaxBarriersPerCall);
  16. private readonly NativeArray<ImageMemoryBarrier> _imageBarrierBatch = new(MaxBarriersPerCall);
  17. private readonly List<BarrierWithStageFlags<MemoryBarrier, int>> _memoryBarriers = new();
  18. private readonly List<BarrierWithStageFlags<BufferMemoryBarrier, int>> _bufferBarriers = new();
  19. private readonly List<BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage>> _imageBarriers = new();
  20. private int _queuedBarrierCount;
  21. private enum IncoherentBarrierType
  22. {
  23. None,
  24. Texture,
  25. All,
  26. CommandBuffer
  27. }
  28. private bool _feedbackLoopActive;
  29. private PipelineStageFlags _incoherentBufferWriteStages;
  30. private PipelineStageFlags _incoherentTextureWriteStages;
  31. private PipelineStageFlags _extraStages;
  32. private IncoherentBarrierType _queuedIncoherentBarrier;
  33. private bool _queuedFeedbackLoopBarrier;
  34. public BarrierBatch(VulkanRenderer gd)
  35. {
  36. _gd = gd;
  37. }
  38. public static (AccessFlags Access, PipelineStageFlags Stages) GetSubpassAccessSuperset(VulkanRenderer gd)
  39. {
  40. AccessFlags access = BufferAccess;
  41. PipelineStageFlags stages = PipelineStageFlags.AllGraphicsBit;
  42. if (gd.TransformFeedbackApi != null)
  43. {
  44. access |= AccessFlags.TransformFeedbackWriteBitExt;
  45. stages |= PipelineStageFlags.TransformFeedbackBitExt;
  46. }
  47. return (access, stages);
  48. }
  49. private readonly record struct StageFlags : IEquatable<StageFlags>
  50. {
  51. public readonly PipelineStageFlags Source;
  52. public readonly PipelineStageFlags Dest;
  53. public StageFlags(PipelineStageFlags source, PipelineStageFlags dest)
  54. {
  55. Source = source;
  56. Dest = dest;
  57. }
  58. }
  59. private readonly struct BarrierWithStageFlags<T, T2> where T : unmanaged
  60. {
  61. public readonly StageFlags Flags;
  62. public readonly T Barrier;
  63. public readonly T2 Resource;
  64. public BarrierWithStageFlags(StageFlags flags, T barrier)
  65. {
  66. Flags = flags;
  67. Barrier = barrier;
  68. Resource = default;
  69. }
  70. public BarrierWithStageFlags(PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags, T barrier, T2 resource)
  71. {
  72. Flags = new StageFlags(srcStageFlags, dstStageFlags);
  73. Barrier = barrier;
  74. Resource = resource;
  75. }
  76. }
  77. private void QueueBarrier<T, T2>(List<BarrierWithStageFlags<T, T2>> list, T barrier, T2 resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) where T : unmanaged
  78. {
  79. list.Add(new BarrierWithStageFlags<T, T2>(srcStageFlags, dstStageFlags, barrier, resource));
  80. _queuedBarrierCount++;
  81. }
  82. public void QueueBarrier(MemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
  83. {
  84. QueueBarrier(_memoryBarriers, barrier, default, srcStageFlags, dstStageFlags);
  85. }
  86. public void QueueBarrier(BufferMemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
  87. {
  88. QueueBarrier(_bufferBarriers, barrier, default, srcStageFlags, dstStageFlags);
  89. }
  90. public void QueueBarrier(ImageMemoryBarrier barrier, TextureStorage resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
  91. {
  92. QueueBarrier(_imageBarriers, barrier, resource, srcStageFlags, dstStageFlags);
  93. }
  94. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  95. public unsafe void FlushMemoryBarrier(ShaderCollection program, bool inRenderPass)
  96. {
  97. if (_queuedIncoherentBarrier > IncoherentBarrierType.None)
  98. {
  99. // We should emit a memory barrier if there's a write access in the program (current program, or program since last barrier)
  100. bool hasTextureWrite = _incoherentTextureWriteStages != PipelineStageFlags.None;
  101. bool hasBufferWrite = _incoherentBufferWriteStages != PipelineStageFlags.None;
  102. bool hasBufferBarrier = _queuedIncoherentBarrier > IncoherentBarrierType.Texture;
  103. if (hasTextureWrite || (hasBufferBarrier && hasBufferWrite))
  104. {
  105. AccessFlags access = BaseAccess;
  106. PipelineStageFlags stages = inRenderPass ? PipelineStageFlags.AllGraphicsBit : PipelineStageFlags.AllCommandsBit;
  107. if (hasBufferBarrier && hasBufferWrite)
  108. {
  109. access |= BufferAccess;
  110. if (_gd.TransformFeedbackApi != null)
  111. {
  112. access |= AccessFlags.TransformFeedbackWriteBitExt;
  113. stages |= PipelineStageFlags.TransformFeedbackBitExt;
  114. }
  115. }
  116. if (_queuedIncoherentBarrier == IncoherentBarrierType.CommandBuffer)
  117. {
  118. access |= CommandBufferAccess;
  119. stages |= PipelineStageFlags.DrawIndirectBit;
  120. }
  121. MemoryBarrier barrier = new MemoryBarrier()
  122. {
  123. SType = StructureType.MemoryBarrier,
  124. SrcAccessMask = access,
  125. DstAccessMask = access
  126. };
  127. QueueBarrier(barrier, stages, stages);
  128. _incoherentTextureWriteStages = program?.IncoherentTextureWriteStages ?? PipelineStageFlags.None;
  129. if (_queuedIncoherentBarrier > IncoherentBarrierType.Texture)
  130. {
  131. if (program != null)
  132. {
  133. _incoherentBufferWriteStages = program.IncoherentBufferWriteStages | _extraStages;
  134. }
  135. else
  136. {
  137. _incoherentBufferWriteStages = PipelineStageFlags.None;
  138. }
  139. }
  140. _queuedIncoherentBarrier = IncoherentBarrierType.None;
  141. _queuedFeedbackLoopBarrier = false;
  142. }
  143. else if (_feedbackLoopActive && _queuedFeedbackLoopBarrier)
  144. {
  145. // Feedback loop barrier.
  146. MemoryBarrier barrier = new MemoryBarrier()
  147. {
  148. SType = StructureType.MemoryBarrier,
  149. SrcAccessMask = AccessFlags.ShaderWriteBit,
  150. DstAccessMask = AccessFlags.ShaderReadBit
  151. };
  152. QueueBarrier(barrier, PipelineStageFlags.FragmentShaderBit, PipelineStageFlags.AllGraphicsBit);
  153. _queuedFeedbackLoopBarrier = false;
  154. }
  155. _feedbackLoopActive = false;
  156. }
  157. }
  158. public unsafe void Flush(CommandBufferScoped cbs, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
  159. {
  160. Flush(cbs, null, false, inRenderPass, rpHolder, endRenderPass);
  161. }
  162. public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool feedbackLoopActive, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
  163. {
  164. if (program != null)
  165. {
  166. _incoherentBufferWriteStages |= program.IncoherentBufferWriteStages | _extraStages;
  167. _incoherentTextureWriteStages |= program.IncoherentTextureWriteStages;
  168. }
  169. _feedbackLoopActive |= feedbackLoopActive;
  170. FlushMemoryBarrier(program, inRenderPass);
  171. if (!inRenderPass && rpHolder != null)
  172. {
  173. // Render pass is about to begin. Queue any fences that normally interrupt the pass.
  174. rpHolder.InsertForcedFences(cbs);
  175. }
  176. while (_queuedBarrierCount > 0)
  177. {
  178. int memoryCount = 0;
  179. int bufferCount = 0;
  180. int imageCount = 0;
  181. bool hasBarrier = false;
  182. StageFlags flags = default;
  183. static void AddBarriers<T, T2>(
  184. Span<T> target,
  185. ref int queuedBarrierCount,
  186. ref bool hasBarrier,
  187. ref StageFlags flags,
  188. ref int count,
  189. List<BarrierWithStageFlags<T, T2>> list) where T : unmanaged
  190. {
  191. int firstMatch = -1;
  192. int end = list.Count;
  193. for (int i = 0; i < list.Count; i++)
  194. {
  195. BarrierWithStageFlags<T, T2> barrier = list[i];
  196. if (!hasBarrier)
  197. {
  198. flags = barrier.Flags;
  199. hasBarrier = true;
  200. target[count++] = barrier.Barrier;
  201. queuedBarrierCount--;
  202. firstMatch = i;
  203. if (count >= target.Length)
  204. {
  205. end = i + 1;
  206. break;
  207. }
  208. }
  209. else
  210. {
  211. if (flags.Equals(barrier.Flags))
  212. {
  213. target[count++] = barrier.Barrier;
  214. queuedBarrierCount--;
  215. if (firstMatch == -1)
  216. {
  217. firstMatch = i;
  218. }
  219. if (count >= target.Length)
  220. {
  221. end = i + 1;
  222. break;
  223. }
  224. }
  225. else
  226. {
  227. // Delete consumed barriers from the first match to the current non-match.
  228. if (firstMatch != -1)
  229. {
  230. int deleteCount = i - firstMatch;
  231. list.RemoveRange(firstMatch, deleteCount);
  232. i -= deleteCount;
  233. firstMatch = -1;
  234. end = list.Count;
  235. }
  236. }
  237. }
  238. }
  239. if (firstMatch == 0 && end == list.Count)
  240. {
  241. list.Clear();
  242. }
  243. else if (firstMatch != -1)
  244. {
  245. int deleteCount = end - firstMatch;
  246. list.RemoveRange(firstMatch, deleteCount);
  247. }
  248. }
  249. if (inRenderPass && _imageBarriers.Count > 0)
  250. {
  251. // Image barriers queued in the batch are meant to be globally scoped,
  252. // but inside a render pass they're scoped to just the range of the render pass.
  253. // On MoltenVK, we just break the rules and always use image barrier.
  254. // On desktop GPUs, all barriers are globally scoped, so we just replace it with a generic memory barrier.
  255. // Generally, we want to avoid this from happening in the future, so flag the texture to immediately
  256. // emit a barrier whenever the current render pass is bound again.
  257. bool anyIsNonAttachment = false;
  258. foreach (BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage> barrier in _imageBarriers)
  259. {
  260. // If the binding is an attachment, don't add it as a forced fence.
  261. bool isAttachment = rpHolder.ContainsAttachment(barrier.Resource);
  262. if (!isAttachment)
  263. {
  264. rpHolder.AddForcedFence(barrier.Resource, barrier.Flags.Dest);
  265. anyIsNonAttachment = true;
  266. }
  267. }
  268. if (_gd.IsTBDR)
  269. {
  270. if (!_gd.IsMoltenVk)
  271. {
  272. if (!anyIsNonAttachment)
  273. {
  274. // This case is a feedback loop. To prevent this from causing an absolute performance disaster,
  275. // remove the barriers entirely.
  276. // If this is not here, there will be a lot of single draw render passes.
  277. // TODO: explicit handling for feedback loops, likely outside this class.
  278. _queuedBarrierCount -= _imageBarriers.Count;
  279. _imageBarriers.Clear();
  280. }
  281. else
  282. {
  283. // TBDR GPUs are sensitive to barriers, so we need to end the pass to ensure the data is available.
  284. // Metal already has hazard tracking so MVK doesn't need this.
  285. endRenderPass();
  286. inRenderPass = false;
  287. }
  288. }
  289. }
  290. else
  291. {
  292. // Generic pipeline memory barriers will work for desktop GPUs.
  293. // They do require a few more access flags on the subpass dependency, though.
  294. foreach (var barrier in _imageBarriers)
  295. {
  296. _memoryBarriers.Add(new BarrierWithStageFlags<MemoryBarrier, int>(
  297. barrier.Flags,
  298. new MemoryBarrier()
  299. {
  300. SType = StructureType.MemoryBarrier,
  301. SrcAccessMask = barrier.Barrier.SrcAccessMask,
  302. DstAccessMask = barrier.Barrier.DstAccessMask
  303. }));
  304. }
  305. _imageBarriers.Clear();
  306. }
  307. }
  308. if (inRenderPass && _memoryBarriers.Count > 0)
  309. {
  310. PipelineStageFlags allFlags = PipelineStageFlags.None;
  311. foreach (var barrier in _memoryBarriers)
  312. {
  313. allFlags |= barrier.Flags.Dest;
  314. }
  315. if (allFlags.HasFlag(PipelineStageFlags.DrawIndirectBit) || !_gd.SupportsRenderPassBarrier(allFlags))
  316. {
  317. endRenderPass();
  318. inRenderPass = false;
  319. }
  320. }
  321. AddBarriers(_memoryBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref memoryCount, _memoryBarriers);
  322. AddBarriers(_bufferBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref bufferCount, _bufferBarriers);
  323. AddBarriers(_imageBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref imageCount, _imageBarriers);
  324. if (hasBarrier)
  325. {
  326. PipelineStageFlags srcStageFlags = flags.Source;
  327. if (inRenderPass)
  328. {
  329. // Inside a render pass, barrier stages can only be from rasterization.
  330. srcStageFlags &= ~PipelineStageFlags.ComputeShaderBit;
  331. }
  332. _gd.Api.CmdPipelineBarrier(
  333. cbs.CommandBuffer,
  334. srcStageFlags,
  335. flags.Dest,
  336. 0,
  337. (uint)memoryCount,
  338. _memoryBarrierBatch.Pointer,
  339. (uint)bufferCount,
  340. _bufferBarrierBatch.Pointer,
  341. (uint)imageCount,
  342. _imageBarrierBatch.Pointer);
  343. }
  344. }
  345. }
  346. private void QueueIncoherentBarrier(IncoherentBarrierType type)
  347. {
  348. if (type > _queuedIncoherentBarrier)
  349. {
  350. _queuedIncoherentBarrier = type;
  351. }
  352. _queuedFeedbackLoopBarrier = true;
  353. }
  354. public void QueueTextureBarrier()
  355. {
  356. QueueIncoherentBarrier(IncoherentBarrierType.Texture);
  357. }
  358. public void QueueMemoryBarrier()
  359. {
  360. QueueIncoherentBarrier(IncoherentBarrierType.All);
  361. }
  362. public void QueueCommandBufferBarrier()
  363. {
  364. QueueIncoherentBarrier(IncoherentBarrierType.CommandBuffer);
  365. }
  366. public void EnableTfbBarriers(bool enable)
  367. {
  368. if (enable)
  369. {
  370. _extraStages |= PipelineStageFlags.TransformFeedbackBitExt;
  371. }
  372. else
  373. {
  374. _extraStages &= ~PipelineStageFlags.TransformFeedbackBitExt;
  375. }
  376. }
  377. public void Dispose()
  378. {
  379. _memoryBarrierBatch.Dispose();
  380. _bufferBarrierBatch.Dispose();
  381. _imageBarrierBatch.Dispose();
  382. }
  383. }
  384. }