Prechádzať zdrojové kódy

Support non-contiguous copies on I2M and DMA engines (#2473)

* Support non-contiguous copies on I2M and DMA engines

* Vector copy should start aligned on I2M

* Nits

* Zero extend the offset
gdkchan 4 rokov pred
rodič
commit
ff5df5d8a1

+ 4 - 7
Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs

@@ -152,14 +152,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
                     dst.MemoryLayout.UnpackGobBlocksInZ(),
                     dstBpp);
 
-                ulong srcBaseAddress = memoryManager.Translate(srcGpuVa);
-                ulong dstBaseAddress = memoryManager.Translate(dstGpuVa);
-
                 (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
                 (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);
 
-                ReadOnlySpan<byte> srcSpan = memoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
-                Span<byte> dstSpan = memoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
+                ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (uint)srcBaseOffset, srcSize, true);
+                Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (uint)dstBaseOffset, dstSize).ToArray();
 
                 bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
                 bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
@@ -217,7 +214,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
                     {
                         srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
 
-                        memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
+                        memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan);
 
                         return;
                     }
@@ -258,7 +255,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
                     _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
                 };
 
-                memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
+                memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan);
             }
             else
             {

+ 25 - 17
Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs

@@ -4,6 +4,7 @@ using Ryujinx.Graphics.Texture;
 using System;
 using System.Collections.Generic;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
 {
@@ -169,13 +170,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
         /// </summary>
         private void FinishTransfer()
         {
-            Span<byte> data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+            var memoryManager = _channel.MemoryManager;
+
+            var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
 
             if (_isLinear && _lineCount == 1)
             {
-                ulong address = _channel.MemoryManager.Translate(_dstGpuVa);
-
-                _channel.MemoryManager.Physical.Write(address, data);
+                memoryManager.Write(_dstGpuVa, data);
             }
             else
             {
@@ -189,36 +190,43 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
 
                 int srcOffset = 0;
 
-                ulong dstBaseAddress = _channel.MemoryManager.Translate(_dstGpuVa);
-
                 for (int y = _dstY; y < _dstY + _lineCount; y++)
                 {
                     int x1 = _dstX;
                     int x2 = _dstX + _lineLengthIn;
-                    int x2Trunc = _dstX + BitUtils.AlignDown(_lineLengthIn, 16);
+                    int x1Round = BitUtils.AlignUp(_dstX, 16);
+                    int x2Trunc = BitUtils.AlignDown(x2, 16);
 
-                    int x;
+                    int x = x1;
 
-                    for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
+                    if (x1Round <= x2)
                     {
-                        int dstOffset = dstCalculator.GetOffset(x, y);
+                        for (; x < x1Round; x++, srcOffset++)
+                        {
+                            int dstOffset = dstCalculator.GetOffset(x, y);
+
+                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;
 
-                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+                            memoryManager.Write(dstAddress, data[srcOffset]);
+                        }
+                    }
 
-                        Span<byte> pixel = data.Slice(srcOffset, 16);
+                    for (; x < x2Trunc; x += 16, srcOffset += 16)
+                    {
+                        int dstOffset = dstCalculator.GetOffset(x, y);
 
-                        _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+                        memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
                     }
 
                     for (; x < x2; x++, srcOffset++)
                     {
                         int dstOffset = dstCalculator.GetOffset(x, y);
 
-                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
-
-                        Span<byte> pixel = data.Slice(srcOffset, 1);
+                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;
 
-                        _channel.MemoryManager.Physical.Write(dstAddress, pixel);
+                        memoryManager.Write(dstAddress, data[srcOffset]);
                     }
                 }
             }