ComputeClass.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. using Ryujinx.Graphics.Device;
  2. using Ryujinx.Graphics.GAL;
  3. using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
  4. using Ryujinx.Graphics.Gpu.Engine.Threed;
  5. using Ryujinx.Graphics.Gpu.Engine.Types;
  6. using Ryujinx.Graphics.Gpu.Image;
  7. using Ryujinx.Graphics.Gpu.Shader;
  8. using Ryujinx.Graphics.Shader;
  9. using System;
  10. using System.Collections.Generic;
  11. using System.Runtime.CompilerServices;
  12. namespace Ryujinx.Graphics.Gpu.Engine.Compute
  13. {
  14. /// <summary>
  15. /// Represents a compute engine class.
  16. /// </summary>
  17. class ComputeClass : IDeviceState
  18. {
  19. private readonly GpuContext _context;
  20. private readonly GpuChannel _channel;
  21. private readonly ThreedClass _3dEngine;
  22. private readonly DeviceState<ComputeClassState> _state;
  23. private readonly InlineToMemoryClass _i2mClass;
  24. /// <summary>
  25. /// Creates a new instance of the compute engine class.
  26. /// </summary>
  27. /// <param name="context">GPU context</param>
  28. /// <param name="channel">GPU channel</param>
  29. /// <param name="threedEngine">3D engine</param>
  30. public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
  31. {
  32. _context = context;
  33. _channel = channel;
  34. _3dEngine = threedEngine;
  35. _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback>
  36. {
  37. { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
  38. { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
  39. { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }
  40. });
  41. _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
  42. }
  43. /// <summary>
  44. /// Reads data from the class registers.
  45. /// </summary>
  46. /// <param name="offset">Register byte offset</param>
  47. /// <returns>Data at the specified offset</returns>
  48. public int Read(int offset) => _state.Read(offset);
  49. /// <summary>
  50. /// Writes data to the class registers.
  51. /// </summary>
  52. /// <param name="offset">Register byte offset</param>
  53. /// <param name="data">Data to be written</param>
  54. public void Write(int offset, int data) => _state.Write(offset, data);
  55. /// <summary>
  56. /// Launches the Inline-to-Memory DMA copy operation.
  57. /// </summary>
  58. /// <param name="argument">Method call argument</param>
  59. private void LaunchDma(int argument)
  60. {
  61. _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument);
  62. }
  63. /// <summary>
  64. /// Pushes a block of data to the Inline-to-Memory engine.
  65. /// </summary>
  66. /// <param name="data">Data to push</param>
  67. public void LoadInlineData(ReadOnlySpan<int> data)
  68. {
  69. _i2mClass.LoadInlineData(data);
  70. }
  71. /// <summary>
  72. /// Pushes a word of data to the Inline-to-Memory engine.
  73. /// </summary>
  74. /// <param name="argument">Method call argument</param>
  75. private void LoadInlineData(int argument)
  76. {
  77. _i2mClass.LoadInlineData(argument);
  78. }
  79. /// <summary>
  80. /// Performs the compute dispatch operation.
  81. /// </summary>
  82. /// <param name="argument">Method call argument</param>
  83. private void SendSignalingPcasB(int argument)
  84. {
  85. var memoryManager = _channel.MemoryManager;
  86. // Since we're going to change the state, make sure any pending instanced draws are done.
  87. _3dEngine.PerformDeferredDraws();
  88. // Make sure all pending uniform buffer data is written to memory.
  89. _3dEngine.FlushUboDirty();
  90. uint qmdAddress = _state.State.SendPcasA;
  91. var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8);
  92. ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
  93. shaderGpuVa += (uint)qmd.ProgramOffset;
  94. int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
  95. int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
  96. for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
  97. {
  98. if (!qmd.ConstantBufferValid(index))
  99. {
  100. continue;
  101. }
  102. ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
  103. ulong size = (ulong)qmd.ConstantBufferSize(index);
  104. _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
  105. }
  106. ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
  107. ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
  108. GpuChannelPoolState poolState = new GpuChannelPoolState(
  109. texturePoolGpuVa,
  110. _state.State.SetTexHeaderPoolCMaximumIndex,
  111. _state.State.SetBindlessTextureConstantBufferSlotSelect);
  112. GpuChannelComputeState computeState = new GpuChannelComputeState(
  113. qmd.CtaThreadDimension0,
  114. qmd.CtaThreadDimension1,
  115. qmd.CtaThreadDimension2,
  116. localMemorySize,
  117. sharedMemorySize,
  118. _channel.BufferManager.HasUnalignedStorageBuffers);
  119. CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
  120. _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
  121. _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
  122. _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
  123. _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
  124. ShaderProgramInfo info = cs.Shaders[0].Info;
  125. bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
  126. for (int index = 0; index < info.SBuffers.Count; index++)
  127. {
  128. BufferDescriptor sb = info.SBuffers[index];
  129. ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
  130. int sbDescOffset = 0x310 + sb.Slot * 0x10;
  131. sbDescAddress += (ulong)sbDescOffset;
  132. SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
  133. _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
  134. }
  135. if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
  136. {
  137. // Refetch the shader, as assumptions about storage buffer alignment have changed.
  138. cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
  139. _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
  140. info = cs.Shaders[0].Info;
  141. }
  142. for (int index = 0; index < info.CBuffers.Count; index++)
  143. {
  144. BufferDescriptor cb = info.CBuffers[index];
  145. // NVN uses the "hardware" constant buffer for anything that is less than 8,
  146. // and those are already bound above.
  147. // Anything greater than or equal to 8 uses the emulated constant buffers.
  148. // They are emulated using global memory loads.
  149. if (cb.Slot < 8)
  150. {
  151. continue;
  152. }
  153. ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
  154. int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10;
  155. cbDescAddress += (ulong)cbDescOffset;
  156. SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress);
  157. _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
  158. }
  159. _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
  160. _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);
  161. int maxTextureBinding = -1;
  162. int maxImageBinding = -1;
  163. TextureBindingInfo[] textureBindings = _channel.TextureManager.RentComputeTextureBindings(info.Textures.Count);
  164. for (int index = 0; index < info.Textures.Count; index++)
  165. {
  166. var descriptor = info.Textures[index];
  167. Target target = ShaderTexture.GetTarget(descriptor.Type);
  168. textureBindings[index] = new TextureBindingInfo(
  169. target,
  170. descriptor.Binding,
  171. descriptor.CbufSlot,
  172. descriptor.HandleIndex,
  173. descriptor.Flags);
  174. if (descriptor.Binding > maxTextureBinding)
  175. {
  176. maxTextureBinding = descriptor.Binding;
  177. }
  178. }
  179. TextureBindingInfo[] imageBindings = _channel.TextureManager.RentComputeImageBindings(info.Images.Count);
  180. for (int index = 0; index < info.Images.Count; index++)
  181. {
  182. var descriptor = info.Images[index];
  183. Target target = ShaderTexture.GetTarget(descriptor.Type);
  184. Format format = ShaderTexture.GetFormat(descriptor.Format);
  185. imageBindings[index] = new TextureBindingInfo(
  186. target,
  187. format,
  188. descriptor.Binding,
  189. descriptor.CbufSlot,
  190. descriptor.HandleIndex,
  191. descriptor.Flags);
  192. if (descriptor.Binding > maxImageBinding)
  193. {
  194. maxImageBinding = descriptor.Binding;
  195. }
  196. }
  197. _channel.TextureManager.SetComputeMaxBindings(maxTextureBinding, maxImageBinding);
  198. // Should never return false for mismatching spec state, since the shader was fetched above.
  199. _channel.TextureManager.CommitComputeBindings(cs.SpecializationState);
  200. _channel.BufferManager.CommitComputeBindings();
  201. _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
  202. _3dEngine.ForceShaderUpdate();
  203. }
  204. }
  205. }