VectorHelper.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. using ChocolArm64.State;
  2. using ChocolArm64.Translation;
  3. using System;
  4. using System.Runtime.CompilerServices;
  5. using System.Runtime.Intrinsics;
  6. using System.Runtime.Intrinsics.X86;
  7. namespace ChocolArm64.Instructions
  8. {
  9. static class VectorHelper
  10. {
  11. public static void EmitCall(ILEmitterCtx context, string name64, string name128)
  12. {
  13. bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64;
  14. context.EmitCall(typeof(VectorHelper), isSimd64 ? name64 : name128);
  15. }
  16. public static void EmitCall(ILEmitterCtx context, string mthdName)
  17. {
  18. context.EmitCall(typeof(VectorHelper), mthdName);
  19. }
  20. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  21. public static int SatF32ToS32(float value)
  22. {
  23. if (float.IsNaN(value)) return 0;
  24. return value >= int.MaxValue ? int.MaxValue :
  25. value <= int.MinValue ? int.MinValue : (int)value;
  26. }
  27. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  28. public static long SatF32ToS64(float value)
  29. {
  30. if (float.IsNaN(value)) return 0;
  31. return value >= long.MaxValue ? long.MaxValue :
  32. value <= long.MinValue ? long.MinValue : (long)value;
  33. }
  34. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  35. public static uint SatF32ToU32(float value)
  36. {
  37. if (float.IsNaN(value)) return 0;
  38. return value >= uint.MaxValue ? uint.MaxValue :
  39. value <= uint.MinValue ? uint.MinValue : (uint)value;
  40. }
  41. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  42. public static ulong SatF32ToU64(float value)
  43. {
  44. if (float.IsNaN(value)) return 0;
  45. return value >= ulong.MaxValue ? ulong.MaxValue :
  46. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  47. }
  48. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  49. public static int SatF64ToS32(double value)
  50. {
  51. if (double.IsNaN(value)) return 0;
  52. return value >= int.MaxValue ? int.MaxValue :
  53. value <= int.MinValue ? int.MinValue : (int)value;
  54. }
  55. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  56. public static long SatF64ToS64(double value)
  57. {
  58. if (double.IsNaN(value)) return 0;
  59. return value >= long.MaxValue ? long.MaxValue :
  60. value <= long.MinValue ? long.MinValue : (long)value;
  61. }
  62. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  63. public static uint SatF64ToU32(double value)
  64. {
  65. if (double.IsNaN(value)) return 0;
  66. return value >= uint.MaxValue ? uint.MaxValue :
  67. value <= uint.MinValue ? uint.MinValue : (uint)value;
  68. }
  69. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  70. public static ulong SatF64ToU64(double value)
  71. {
  72. if (double.IsNaN(value)) return 0;
  73. return value >= ulong.MaxValue ? ulong.MaxValue :
  74. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  75. }
  76. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  77. public static double Round(double value, CpuThreadState state)
  78. {
  79. RoundMode roundMode = state.FPRoundingMode();
  80. if (roundMode == RoundMode.ToNearest)
  81. {
  82. return Math.Round(value); // even
  83. }
  84. else if (roundMode == RoundMode.TowardsPlusInfinity)
  85. {
  86. return Math.Ceiling(value);
  87. }
  88. else if (roundMode == RoundMode.TowardsMinusInfinity)
  89. {
  90. return Math.Floor(value);
  91. }
  92. else /* if (roundMode == RoundMode.TowardsZero) */
  93. {
  94. return Math.Truncate(value);
  95. }
  96. }
  97. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  98. public static float RoundF(float value, CpuThreadState state)
  99. {
  100. RoundMode roundMode = state.FPRoundingMode();
  101. if (roundMode == RoundMode.ToNearest)
  102. {
  103. return MathF.Round(value); // even
  104. }
  105. else if (roundMode == RoundMode.TowardsPlusInfinity)
  106. {
  107. return MathF.Ceiling(value);
  108. }
  109. else if (roundMode == RoundMode.TowardsMinusInfinity)
  110. {
  111. return MathF.Floor(value);
  112. }
  113. else /* if (roundMode == RoundMode.TowardsZero) */
  114. {
  115. return MathF.Truncate(value);
  116. }
  117. }
  118. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  119. public static Vector128<double> Sse41ScalarRound(Vector128<double> upper, Vector128<double> value, CpuThreadState state)
  120. {
  121. if (!Sse41.IsSupported)
  122. {
  123. throw new PlatformNotSupportedException();
  124. }
  125. RoundMode roundMode = state.FPRoundingMode();
  126. if (roundMode == RoundMode.ToNearest)
  127. {
  128. return Sse41.RoundToNearestIntegerScalar(upper, value); // even
  129. }
  130. else if (roundMode == RoundMode.TowardsPlusInfinity)
  131. {
  132. return Sse41.RoundToPositiveInfinityScalar(upper, value);
  133. }
  134. else if (roundMode == RoundMode.TowardsMinusInfinity)
  135. {
  136. return Sse41.RoundToNegativeInfinityScalar(upper, value);
  137. }
  138. else /* if (roundMode == RoundMode.TowardsZero) */
  139. {
  140. return Sse41.RoundToZeroScalar(upper, value);
  141. }
  142. }
  143. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  144. public static Vector128<float> Sse41ScalarRoundF(Vector128<float> upper, Vector128<float> value, CpuThreadState state)
  145. {
  146. if (!Sse41.IsSupported)
  147. {
  148. throw new PlatformNotSupportedException();
  149. }
  150. RoundMode roundMode = state.FPRoundingMode();
  151. if (roundMode == RoundMode.ToNearest)
  152. {
  153. return Sse41.RoundToNearestIntegerScalar(upper, value); // even
  154. }
  155. else if (roundMode == RoundMode.TowardsPlusInfinity)
  156. {
  157. return Sse41.RoundToPositiveInfinityScalar(upper, value);
  158. }
  159. else if (roundMode == RoundMode.TowardsMinusInfinity)
  160. {
  161. return Sse41.RoundToNegativeInfinityScalar(upper, value);
  162. }
  163. else /* if (roundMode == RoundMode.TowardsZero) */
  164. {
  165. return Sse41.RoundToZeroScalar(upper, value);
  166. }
  167. }
  168. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  169. public static Vector128<double> Sse41VectorRound(Vector128<double> value, CpuThreadState state)
  170. {
  171. if (!Sse41.IsSupported)
  172. {
  173. throw new PlatformNotSupportedException();
  174. }
  175. RoundMode roundMode = state.FPRoundingMode();
  176. if (roundMode == RoundMode.ToNearest)
  177. {
  178. return Sse41.RoundToNearestInteger(value); // even
  179. }
  180. else if (roundMode == RoundMode.TowardsPlusInfinity)
  181. {
  182. return Sse41.RoundToPositiveInfinity(value);
  183. }
  184. else if (roundMode == RoundMode.TowardsMinusInfinity)
  185. {
  186. return Sse41.RoundToNegativeInfinity(value);
  187. }
  188. else /* if (roundMode == RoundMode.TowardsZero) */
  189. {
  190. return Sse41.RoundToZero(value);
  191. }
  192. }
  193. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  194. public static Vector128<float> Sse41VectorRoundF(Vector128<float> value, CpuThreadState state)
  195. {
  196. if (!Sse41.IsSupported)
  197. {
  198. throw new PlatformNotSupportedException();
  199. }
  200. RoundMode roundMode = state.FPRoundingMode();
  201. if (roundMode == RoundMode.ToNearest)
  202. {
  203. return Sse41.RoundToNearestInteger(value); // even
  204. }
  205. else if (roundMode == RoundMode.TowardsPlusInfinity)
  206. {
  207. return Sse41.RoundToPositiveInfinity(value);
  208. }
  209. else if (roundMode == RoundMode.TowardsMinusInfinity)
  210. {
  211. return Sse41.RoundToNegativeInfinity(value);
  212. }
  213. else /* if (roundMode == RoundMode.TowardsZero) */
  214. {
  215. return Sse41.RoundToZero(value);
  216. }
  217. }
  218. public static Vector128<float> Tbl1_V64(
  219. Vector128<float> vector,
  220. Vector128<float> tb0)
  221. {
  222. return Tbl(vector, 8, tb0);
  223. }
  224. public static Vector128<float> Tbl1_V128(
  225. Vector128<float> vector,
  226. Vector128<float> tb0)
  227. {
  228. return Tbl(vector, 16, tb0);
  229. }
  230. public static Vector128<float> Tbl2_V64(
  231. Vector128<float> vector,
  232. Vector128<float> tb0,
  233. Vector128<float> tb1)
  234. {
  235. return Tbl(vector, 8, tb0, tb1);
  236. }
  237. public static Vector128<float> Tbl2_V128(
  238. Vector128<float> vector,
  239. Vector128<float> tb0,
  240. Vector128<float> tb1)
  241. {
  242. return Tbl(vector, 16, tb0, tb1);
  243. }
  244. public static Vector128<float> Tbl3_V64(
  245. Vector128<float> vector,
  246. Vector128<float> tb0,
  247. Vector128<float> tb1,
  248. Vector128<float> tb2)
  249. {
  250. return Tbl(vector, 8, tb0, tb1, tb2);
  251. }
  252. public static Vector128<float> Tbl3_V128(
  253. Vector128<float> vector,
  254. Vector128<float> tb0,
  255. Vector128<float> tb1,
  256. Vector128<float> tb2)
  257. {
  258. return Tbl(vector, 16, tb0, tb1, tb2);
  259. }
  260. public static Vector128<float> Tbl4_V64(
  261. Vector128<float> vector,
  262. Vector128<float> tb0,
  263. Vector128<float> tb1,
  264. Vector128<float> tb2,
  265. Vector128<float> tb3)
  266. {
  267. return Tbl(vector, 8, tb0, tb1, tb2, tb3);
  268. }
  269. public static Vector128<float> Tbl4_V128(
  270. Vector128<float> vector,
  271. Vector128<float> tb0,
  272. Vector128<float> tb1,
  273. Vector128<float> tb2,
  274. Vector128<float> tb3)
  275. {
  276. return Tbl(vector, 16, tb0, tb1, tb2, tb3);
  277. }
  278. private static Vector128<float> Tbl(Vector128<float> vector, int bytes, params Vector128<float>[] tb)
  279. {
  280. Vector128<float> res = new Vector128<float>();
  281. byte[] table = new byte[tb.Length * 16];
  282. for (byte index = 0; index < tb.Length; index++)
  283. for (byte index2 = 0; index2 < 16; index2++)
  284. {
  285. table[index * 16 + index2] = (byte)VectorExtractIntZx(tb[index], index2, 0);
  286. }
  287. for (byte index = 0; index < bytes; index++)
  288. {
  289. byte tblIdx = (byte)VectorExtractIntZx(vector, index, 0);
  290. if (tblIdx < table.Length)
  291. {
  292. res = VectorInsertInt(table[tblIdx], res, index, 0);
  293. }
  294. }
  295. return res;
  296. }
  297. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  298. public static double VectorExtractDouble(Vector128<float> vector, byte index)
  299. {
  300. if (Sse41.IsSupported)
  301. {
  302. return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(vector), index));
  303. }
  304. else if (Sse2.IsSupported)
  305. {
  306. return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3));
  307. }
  308. throw new PlatformNotSupportedException();
  309. }
  310. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  311. public static long VectorExtractIntSx(Vector128<float> vector, byte index, int size)
  312. {
  313. if (Sse41.IsSupported)
  314. {
  315. if (size == 0)
  316. {
  317. return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
  318. }
  319. else if (size == 1)
  320. {
  321. return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
  322. }
  323. else if (size == 2)
  324. {
  325. return Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
  326. }
  327. else if (size == 3)
  328. {
  329. return Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
  330. }
  331. else
  332. {
  333. throw new ArgumentOutOfRangeException(nameof(size));
  334. }
  335. }
  336. else if (Sse2.IsSupported)
  337. {
  338. if (size == 0)
  339. {
  340. return (sbyte)VectorExtractIntZx(vector, index, size);
  341. }
  342. else if (size == 1)
  343. {
  344. return (short)VectorExtractIntZx(vector, index, size);
  345. }
  346. else if (size == 2)
  347. {
  348. return (int)VectorExtractIntZx(vector, index, size);
  349. }
  350. else if (size == 3)
  351. {
  352. return (long)VectorExtractIntZx(vector, index, size);
  353. }
  354. else
  355. {
  356. throw new ArgumentOutOfRangeException(nameof(size));
  357. }
  358. }
  359. throw new PlatformNotSupportedException();
  360. }
  361. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  362. public static ulong VectorExtractIntZx(Vector128<float> vector, byte index, int size)
  363. {
  364. if (Sse41.IsSupported)
  365. {
  366. if (size == 0)
  367. {
  368. return Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
  369. }
  370. else if (size == 1)
  371. {
  372. return Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
  373. }
  374. else if (size == 2)
  375. {
  376. return Sse41.Extract(Sse.StaticCast<float, uint>(vector), index);
  377. }
  378. else if (size == 3)
  379. {
  380. return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), index);
  381. }
  382. else
  383. {
  384. throw new ArgumentOutOfRangeException(nameof(size));
  385. }
  386. }
  387. else if (Sse2.IsSupported)
  388. {
  389. int shortIdx = size == 0
  390. ? index >> 1
  391. : index << (size - 1);
  392. ushort value = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
  393. if (size == 0)
  394. {
  395. return (byte)(value >> (index & 1) * 8);
  396. }
  397. else if (size == 1)
  398. {
  399. return value;
  400. }
  401. else if (size == 2 || size == 3)
  402. {
  403. ushort value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 1));
  404. if (size == 2)
  405. {
  406. return (uint)(value | (value1 << 16));
  407. }
  408. ushort value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 2));
  409. ushort value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 3));
  410. return ((ulong)value << 0) |
  411. ((ulong)value1 << 16) |
  412. ((ulong)value2 << 32) |
  413. ((ulong)value3 << 48);
  414. }
  415. else
  416. {
  417. throw new ArgumentOutOfRangeException(nameof(size));
  418. }
  419. }
  420. throw new PlatformNotSupportedException();
  421. }
  422. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  423. public static float VectorExtractSingle(Vector128<float> vector, byte index)
  424. {
  425. if (Sse41.IsSupported)
  426. {
  427. return Sse41.Extract(vector, index);
  428. }
  429. else if (Sse2.IsSupported)
  430. {
  431. Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
  432. int low = Sse2.Extract(shortVector, (byte)(index * 2 + 0));
  433. int high = Sse2.Extract(shortVector, (byte)(index * 2 + 1));
  434. return BitConverter.Int32BitsToSingle(low | (high << 16));
  435. }
  436. throw new PlatformNotSupportedException();
  437. }
  438. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  439. public static Vector128<float> VectorInsertDouble(double value, Vector128<float> vector, byte index)
  440. {
  441. return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(value), vector, index, 3);
  442. }
  443. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  444. public static Vector128<float> VectorInsertInt(ulong value, Vector128<float> vector, byte index, int size)
  445. {
  446. if (Sse41.IsSupported)
  447. {
  448. if (size == 0)
  449. {
  450. return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(vector), (byte)value, index));
  451. }
  452. else if (size == 1)
  453. {
  454. return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
  455. }
  456. else if (size == 2)
  457. {
  458. return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(vector), (uint)value, index));
  459. }
  460. else if (size == 3)
  461. {
  462. return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(vector), value, index));
  463. }
  464. else
  465. {
  466. throw new ArgumentOutOfRangeException(nameof(size));
  467. }
  468. }
  469. else if (Sse2.IsSupported)
  470. {
  471. Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
  472. int shortIdx = size == 0
  473. ? index >> 1
  474. : index << (size - 1);
  475. if (size == 0)
  476. {
  477. ushort shortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
  478. int shift = (index & 1) * 8;
  479. shortVal &= (ushort)(0xff00 >> shift);
  480. shortVal |= (ushort)((byte)value << shift);
  481. return Sse.StaticCast<ushort, float>(Sse2.Insert(shortVector, shortVal, (byte)shortIdx));
  482. }
  483. else if (size == 1)
  484. {
  485. return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
  486. }
  487. else if (size == 2 || size == 3)
  488. {
  489. shortVector = Sse2.Insert(shortVector, (ushort)(value >> 0), (byte)(shortIdx + 0));
  490. shortVector = Sse2.Insert(shortVector, (ushort)(value >> 16), (byte)(shortIdx + 1));
  491. if (size == 3)
  492. {
  493. shortVector = Sse2.Insert(shortVector, (ushort)(value >> 32), (byte)(shortIdx + 2));
  494. shortVector = Sse2.Insert(shortVector, (ushort)(value >> 48), (byte)(shortIdx + 3));
  495. }
  496. return Sse.StaticCast<ushort, float>(shortVector);
  497. }
  498. else
  499. {
  500. throw new ArgumentOutOfRangeException(nameof(size));
  501. }
  502. }
  503. throw new PlatformNotSupportedException();
  504. }
  505. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  506. public static Vector128<float> VectorInsertSingle(float value, Vector128<float> vector, byte index)
  507. {
  508. if (Sse41.IsSupported)
  509. {
  510. // Note: The if/else if is necessary to enable the JIT to
  511. // produce a single INSERTPS instruction instead of the
  512. // jump table fallback.
  513. if (index == 0)
  514. {
  515. return Sse41.Insert(vector, value, 0x00);
  516. }
  517. else if (index == 1)
  518. {
  519. return Sse41.Insert(vector, value, 0x10);
  520. }
  521. else if (index == 2)
  522. {
  523. return Sse41.Insert(vector, value, 0x20);
  524. }
  525. else if (index == 3)
  526. {
  527. return Sse41.Insert(vector, value, 0x30);
  528. }
  529. else
  530. {
  531. throw new ArgumentOutOfRangeException(nameof(index));
  532. }
  533. }
  534. else if (Sse2.IsSupported)
  535. {
  536. int intValue = BitConverter.SingleToInt32Bits(value);
  537. ushort low = (ushort)(intValue >> 0);
  538. ushort high = (ushort)(intValue >> 16);
  539. Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
  540. shortVector = Sse2.Insert(shortVector, low, (byte)(index * 2 + 0));
  541. shortVector = Sse2.Insert(shortVector, high, (byte)(index * 2 + 1));
  542. return Sse.StaticCast<ushort, float>(shortVector);
  543. }
  544. throw new PlatformNotSupportedException();
  545. }
  546. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  547. public static Vector128<float> Sse41VectorInsertScalarSingle(float value, Vector128<float> vector)
  548. {
  549. // Note: 0b1110 is the mask to zero the upper bits.
  550. return Sse41.Insert(vector, value, 0b1110);
  551. }
  552. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  553. public static Vector128<float> VectorSingleZero()
  554. {
  555. if (Sse.IsSupported)
  556. {
  557. return Sse.SetZeroVector128();
  558. }
  559. throw new PlatformNotSupportedException();
  560. }
  561. }
  562. }