InlineToMemoryClass.cs 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. using Ryujinx.Common;
  2. using Ryujinx.Common.Memory;
  3. using Ryujinx.Graphics.Device;
  4. using Ryujinx.Graphics.Texture;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Runtime.InteropServices;
  8. using System.Runtime.Intrinsics;
  9. namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
  10. {
  11. /// <summary>
  12. /// Represents a Inline-to-Memory engine class.
  13. /// </summary>
  14. class InlineToMemoryClass : IDeviceState
  15. {
  16. private readonly GpuContext _context;
  17. private readonly GpuChannel _channel;
  18. private readonly DeviceState<InlineToMemoryClassState> _state;
  19. private bool _isLinear;
  20. private int _offset;
  21. private int _size;
  22. private ulong _dstGpuVa;
  23. private int _dstX;
  24. private int _dstY;
  25. private int _dstWidth;
  26. private int _dstHeight;
  27. private int _dstStride;
  28. private int _dstGobBlocksInY;
  29. private int _dstGobBlocksInZ;
  30. private int _lineLengthIn;
  31. private int _lineCount;
  32. private bool _finished;
  33. private int[] _buffer;
  34. /// <summary>
  35. /// Creates a new instance of the Inline-to-Memory engine class.
  36. /// </summary>
  37. /// <param name="context">GPU context</param>
  38. /// <param name="channel">GPU channel</param>
  39. /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param>
  40. public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
  41. {
  42. _context = context;
  43. _channel = channel;
  44. if (initializeState)
  45. {
  46. _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback>
  47. {
  48. { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
  49. { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
  50. });
  51. }
  52. }
  53. /// <summary>
  54. /// Creates a new instance of the inline-to-memory engine class.
  55. /// </summary>
  56. /// <param name="context">GPU context</param>
  57. /// <param name="channel">GPU channel</param>
  58. public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
  59. {
  60. }
  61. /// <summary>
  62. /// Reads data from the class registers.
  63. /// </summary>
  64. /// <param name="offset">Register byte offset</param>
  65. /// <returns>Data at the specified offset</returns>
  66. public int Read(int offset) => _state.Read(offset);
  67. /// <summary>
  68. /// Writes data to the class registers.
  69. /// </summary>
  70. /// <param name="offset">Register byte offset</param>
  71. /// <param name="data">Data to be written</param>
  72. public void Write(int offset, int data) => _state.Write(offset, data);
  73. /// <summary>
  74. /// Launches Inline-to-Memory engine DMA copy.
  75. /// </summary>
  76. /// <param name="argument">Method call argument</param>
  77. private void LaunchDma(int argument)
  78. {
  79. LaunchDma(ref _state.State, argument);
  80. }
  81. /// <summary>
  82. /// Launches Inline-to-Memory engine DMA copy.
  83. /// </summary>
  84. /// <param name="state">Current class state</param>
  85. /// <param name="argument">Method call argument</param>
  86. public void LaunchDma(ref InlineToMemoryClassState state, int argument)
  87. {
  88. _isLinear = (argument & 1) != 0;
  89. _offset = 0;
  90. _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount);
  91. int count = _size / 4;
  92. if (_buffer == null || _buffer.Length < count)
  93. {
  94. _buffer = new int[count];
  95. }
  96. ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
  97. _dstGpuVa = dstGpuVa;
  98. _dstX = state.SetDstOriginBytesXV;
  99. _dstY = state.SetDstOriginSamplesYV;
  100. _dstWidth = (int)state.SetDstWidth;
  101. _dstHeight = (int)state.SetDstHeight;
  102. _dstStride = (int)state.PitchOut;
  103. _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
  104. _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth;
  105. _lineLengthIn = (int)state.LineLengthIn;
  106. _lineCount = (int)state.LineCount;
  107. _finished = false;
  108. }
  109. /// <summary>
  110. /// Pushes a block of data to the Inline-to-Memory engine.
  111. /// </summary>
  112. /// <param name="data">Data to push</param>
  113. public void LoadInlineData(ReadOnlySpan<int> data)
  114. {
  115. if (!_finished)
  116. {
  117. int copySize = Math.Min(data.Length, _buffer.Length - _offset);
  118. data[..copySize].CopyTo(new Span<int>(_buffer).Slice(_offset, copySize));
  119. _offset += copySize;
  120. if (_offset * 4 >= _size)
  121. {
  122. FinishTransfer();
  123. }
  124. }
  125. }
  126. /// <summary>
  127. /// Pushes a word of data to the Inline-to-Memory engine.
  128. /// </summary>
  129. /// <param name="argument">Method call argument</param>
  130. public void LoadInlineData(int argument)
  131. {
  132. if (!_finished)
  133. {
  134. _buffer[_offset++] = argument;
  135. if (_offset * 4 >= _size)
  136. {
  137. FinishTransfer();
  138. }
  139. }
  140. }
  141. /// <summary>
  142. /// Performs actual copy of the inline data after the transfer is finished.
  143. /// </summary>
  144. private void FinishTransfer()
  145. {
  146. var memoryManager = _channel.MemoryManager;
  147. var data = MemoryMarshal.Cast<int, byte>(_buffer)[.._size];
  148. if (_isLinear && _lineCount == 1)
  149. {
  150. memoryManager.WriteTrackedResource(_dstGpuVa, data[.._lineLengthIn]);
  151. _context.AdvanceSequence();
  152. }
  153. else
  154. {
  155. // TODO: Verify if the destination X/Y and width/height are taken into account
  156. // for linear texture transfers. If not, we can use the fast path for that aswell.
  157. // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
  158. if (!_isLinear)
  159. {
  160. var target = memoryManager.Physical.TextureCache.FindTexture(
  161. memoryManager,
  162. _dstGpuVa,
  163. 1,
  164. _dstStride,
  165. _dstHeight,
  166. _lineLengthIn,
  167. _lineCount,
  168. _isLinear,
  169. _dstGobBlocksInY,
  170. _dstGobBlocksInZ);
  171. if (target != null)
  172. {
  173. target.SynchronizeMemory();
  174. var dataCopy = MemoryOwner<byte>.RentCopy(data);
  175. target.SetData(dataCopy, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));
  176. target.SignalModified();
  177. return;
  178. }
  179. }
  180. var dstCalculator = new OffsetCalculator(
  181. _dstWidth,
  182. _dstHeight,
  183. _dstStride,
  184. _isLinear,
  185. _dstGobBlocksInY,
  186. 1);
  187. int srcOffset = 0;
  188. for (int y = _dstY; y < _dstY + _lineCount; y++)
  189. {
  190. int x1 = _dstX;
  191. int x2 = _dstX + _lineLengthIn;
  192. int x1Round = BitUtils.AlignUp(_dstX, 16);
  193. int x2Trunc = BitUtils.AlignDown(x2, 16);
  194. int x = x1;
  195. if (x1Round <= x2)
  196. {
  197. for (; x < x1Round; x++, srcOffset++)
  198. {
  199. int dstOffset = dstCalculator.GetOffset(x, y);
  200. ulong dstAddress = _dstGpuVa + (uint)dstOffset;
  201. memoryManager.Write(dstAddress, data[srcOffset]);
  202. }
  203. }
  204. for (; x < x2Trunc; x += 16, srcOffset += 16)
  205. {
  206. int dstOffset = dstCalculator.GetOffset(x, y);
  207. ulong dstAddress = _dstGpuVa + (uint)dstOffset;
  208. memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
  209. }
  210. for (; x < x2; x++, srcOffset++)
  211. {
  212. int dstOffset = dstCalculator.GetOffset(x, y);
  213. ulong dstAddress = _dstGpuVa + (uint)dstOffset;
  214. memoryManager.Write(dstAddress, data[srcOffset]);
  215. }
  216. // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
  217. // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
  218. int misalignment = _lineLengthIn & 3;
  219. if (misalignment != 0)
  220. {
  221. srcOffset += 4 - misalignment;
  222. }
  223. }
  224. _context.AdvanceSequence();
  225. }
  226. _finished = true;
  227. }
  228. }
  229. }