FormatConverter.cs 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. using Ryujinx.Common.Memory;
  2. using System;
  3. using System.Numerics;
  4. using System.Runtime.InteropServices;
  5. using System.Runtime.Intrinsics;
  6. using System.Runtime.Intrinsics.X86;
  7. namespace Ryujinx.Graphics.OpenGL.Image
  8. {
  9. static class FormatConverter
  10. {
  11. public unsafe static MemoryOwner<byte> ConvertS8D24ToD24S8(ReadOnlySpan<byte> data)
  12. {
  13. MemoryOwner<byte> outputMemory = MemoryOwner<byte>.Rent(data.Length);
  14. Span<byte> output = outputMemory.Span;
  15. int start = 0;
  16. if (Avx2.IsSupported)
  17. {
  18. Vector256<byte> mask = Vector256.Create(
  19. (byte)3, (byte)0, (byte)1, (byte)2,
  20. (byte)7, (byte)4, (byte)5, (byte)6,
  21. (byte)11, (byte)8, (byte)9, (byte)10,
  22. (byte)15, (byte)12, (byte)13, (byte)14,
  23. (byte)19, (byte)16, (byte)17, (byte)18,
  24. (byte)23, (byte)20, (byte)21, (byte)22,
  25. (byte)27, (byte)24, (byte)25, (byte)26,
  26. (byte)31, (byte)28, (byte)29, (byte)30);
  27. int sizeAligned = data.Length & ~31;
  28. fixed (byte* pInput = data, pOutput = output)
  29. {
  30. for (uint i = 0; i < sizeAligned; i += 32)
  31. {
  32. Vector256<byte> dataVec = Avx.LoadVector256(pInput + i);
  33. dataVec = Avx2.Shuffle(dataVec, mask);
  34. Avx.Store(pOutput + i, dataVec);
  35. }
  36. }
  37. start = sizeAligned;
  38. }
  39. else if (Ssse3.IsSupported)
  40. {
  41. Vector128<byte> mask = Vector128.Create(
  42. (byte)3, (byte)0, (byte)1, (byte)2,
  43. (byte)7, (byte)4, (byte)5, (byte)6,
  44. (byte)11, (byte)8, (byte)9, (byte)10,
  45. (byte)15, (byte)12, (byte)13, (byte)14);
  46. int sizeAligned = data.Length & ~15;
  47. fixed (byte* pInput = data, pOutput = output)
  48. {
  49. for (uint i = 0; i < sizeAligned; i += 16)
  50. {
  51. Vector128<byte> dataVec = Sse2.LoadVector128(pInput + i);
  52. dataVec = Ssse3.Shuffle(dataVec, mask);
  53. Sse2.Store(pOutput + i, dataVec);
  54. }
  55. }
  56. start = sizeAligned;
  57. }
  58. Span<uint> outSpan = MemoryMarshal.Cast<byte, uint>(output);
  59. ReadOnlySpan<uint> dataSpan = MemoryMarshal.Cast<byte, uint>(data);
  60. for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
  61. {
  62. outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8);
  63. }
  64. return outputMemory;
  65. }
  66. public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data)
  67. {
  68. byte[] output = new byte[data.Length];
  69. int start = 0;
  70. if (Avx2.IsSupported)
  71. {
  72. Vector256<byte> mask = Vector256.Create(
  73. (byte)1, (byte)2, (byte)3, (byte)0,
  74. (byte)5, (byte)6, (byte)7, (byte)4,
  75. (byte)9, (byte)10, (byte)11, (byte)8,
  76. (byte)13, (byte)14, (byte)15, (byte)12,
  77. (byte)17, (byte)18, (byte)19, (byte)16,
  78. (byte)21, (byte)22, (byte)23, (byte)20,
  79. (byte)25, (byte)26, (byte)27, (byte)24,
  80. (byte)29, (byte)30, (byte)31, (byte)28);
  81. int sizeAligned = data.Length & ~31;
  82. fixed (byte* pInput = data, pOutput = output)
  83. {
  84. for (uint i = 0; i < sizeAligned; i += 32)
  85. {
  86. Vector256<byte> dataVec = Avx.LoadVector256(pInput + i);
  87. dataVec = Avx2.Shuffle(dataVec, mask);
  88. Avx.Store(pOutput + i, dataVec);
  89. }
  90. }
  91. start = sizeAligned;
  92. }
  93. else if (Ssse3.IsSupported)
  94. {
  95. Vector128<byte> mask = Vector128.Create(
  96. (byte)1, (byte)2, (byte)3, (byte)0,
  97. (byte)5, (byte)6, (byte)7, (byte)4,
  98. (byte)9, (byte)10, (byte)11, (byte)8,
  99. (byte)13, (byte)14, (byte)15, (byte)12);
  100. int sizeAligned = data.Length & ~15;
  101. fixed (byte* pInput = data, pOutput = output)
  102. {
  103. for (uint i = 0; i < sizeAligned; i += 16)
  104. {
  105. Vector128<byte> dataVec = Sse2.LoadVector128(pInput + i);
  106. dataVec = Ssse3.Shuffle(dataVec, mask);
  107. Sse2.Store(pOutput + i, dataVec);
  108. }
  109. }
  110. start = sizeAligned;
  111. }
  112. Span<uint> outSpan = MemoryMarshal.Cast<byte, uint>(output);
  113. ReadOnlySpan<uint> dataSpan = MemoryMarshal.Cast<byte, uint>(data);
  114. for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
  115. {
  116. outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8);
  117. }
  118. return output;
  119. }
  120. }
  121. }