SoftFallback.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. using ARMeilleure.State;
  2. using System;
  3. namespace ARMeilleure.Instructions
  4. {
  5. static class SoftFallback
  6. {
  7. #region "ShrImm64"
  8. public static long SignedShrImm64(long value, long roundConst, int shift)
  9. {
  10. if (roundConst == 0L)
  11. {
  12. if (shift <= 63)
  13. {
  14. return value >> shift;
  15. }
  16. else /* if (shift == 64) */
  17. {
  18. if (value < 0L)
  19. {
  20. return -1L;
  21. }
  22. else /* if (value >= 0L) */
  23. {
  24. return 0L;
  25. }
  26. }
  27. }
  28. else /* if (roundConst == 1L << (shift - 1)) */
  29. {
  30. if (shift <= 63)
  31. {
  32. long add = value + roundConst;
  33. if ((~value & (value ^ add)) < 0L)
  34. {
  35. return (long)((ulong)add >> shift);
  36. }
  37. else
  38. {
  39. return add >> shift;
  40. }
  41. }
  42. else /* if (shift == 64) */
  43. {
  44. return 0L;
  45. }
  46. }
  47. }
  48. public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
  49. {
  50. if (roundConst == 0L)
  51. {
  52. if (shift <= 63)
  53. {
  54. return value >> shift;
  55. }
  56. else /* if (shift == 64) */
  57. {
  58. return 0UL;
  59. }
  60. }
  61. else /* if (roundConst == 1L << (shift - 1)) */
  62. {
  63. ulong add = value + (ulong)roundConst;
  64. if ((add < value) && (add < (ulong)roundConst))
  65. {
  66. if (shift <= 63)
  67. {
  68. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  69. }
  70. else /* if (shift == 64) */
  71. {
  72. return 1UL;
  73. }
  74. }
  75. else
  76. {
  77. if (shift <= 63)
  78. {
  79. return add >> shift;
  80. }
  81. else /* if (shift == 64) */
  82. {
  83. return 0UL;
  84. }
  85. }
  86. }
  87. }
  88. #endregion
  89. #region "Rounding"
  90. public static double Round(double value)
  91. {
  92. ExecutionContext context = NativeInterface.GetContext();
  93. FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
  94. if (roundMode == FPRoundingMode.ToNearest)
  95. {
  96. return Math.Round(value); // even
  97. }
  98. else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
  99. {
  100. return Math.Ceiling(value);
  101. }
  102. else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
  103. {
  104. return Math.Floor(value);
  105. }
  106. else /* if (roundMode == FPRoundingMode.TowardsZero) */
  107. {
  108. return Math.Truncate(value);
  109. }
  110. }
  111. public static float RoundF(float value)
  112. {
  113. ExecutionContext context = NativeInterface.GetContext();
  114. FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
  115. if (roundMode == FPRoundingMode.ToNearest)
  116. {
  117. return MathF.Round(value); // even
  118. }
  119. else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
  120. {
  121. return MathF.Ceiling(value);
  122. }
  123. else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
  124. {
  125. return MathF.Floor(value);
  126. }
  127. else /* if (roundMode == FPRoundingMode.TowardsZero) */
  128. {
  129. return MathF.Truncate(value);
  130. }
  131. }
  132. public static int FloatToInt32(float value)
  133. {
  134. return SatF32ToS32(RoundF(value));
  135. }
  136. public static int DoubleToInt32(double value)
  137. {
  138. return SatF64ToS32(Round(value));
  139. }
  140. public static uint FloatToUInt32(float value)
  141. {
  142. return SatF32ToU32(RoundF(value));
  143. }
  144. public static uint DoubleToUInt32(double value)
  145. {
  146. return SatF64ToU32(Round(value));
  147. }
  148. #endregion
  149. #region "Saturation"
  150. public static int SatF32ToS32(float value)
  151. {
  152. if (float.IsNaN(value)) return 0;
  153. return value >= int.MaxValue ? int.MaxValue :
  154. value <= int.MinValue ? int.MinValue : (int)value;
  155. }
  156. public static long SatF32ToS64(float value)
  157. {
  158. if (float.IsNaN(value)) return 0;
  159. return value >= long.MaxValue ? long.MaxValue :
  160. value <= long.MinValue ? long.MinValue : (long)value;
  161. }
  162. public static uint SatF32ToU32(float value)
  163. {
  164. if (float.IsNaN(value)) return 0;
  165. return value >= uint.MaxValue ? uint.MaxValue :
  166. value <= uint.MinValue ? uint.MinValue : (uint)value;
  167. }
  168. public static ulong SatF32ToU64(float value)
  169. {
  170. if (float.IsNaN(value)) return 0;
  171. return value >= ulong.MaxValue ? ulong.MaxValue :
  172. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  173. }
  174. public static int SatF64ToS32(double value)
  175. {
  176. if (double.IsNaN(value)) return 0;
  177. return value >= int.MaxValue ? int.MaxValue :
  178. value <= int.MinValue ? int.MinValue : (int)value;
  179. }
  180. public static long SatF64ToS64(double value)
  181. {
  182. if (double.IsNaN(value)) return 0;
  183. return value >= long.MaxValue ? long.MaxValue :
  184. value <= long.MinValue ? long.MinValue : (long)value;
  185. }
  186. public static uint SatF64ToU32(double value)
  187. {
  188. if (double.IsNaN(value)) return 0;
  189. return value >= uint.MaxValue ? uint.MaxValue :
  190. value <= uint.MinValue ? uint.MinValue : (uint)value;
  191. }
  192. public static ulong SatF64ToU64(double value)
  193. {
  194. if (double.IsNaN(value)) return 0;
  195. return value >= ulong.MaxValue ? ulong.MaxValue :
  196. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  197. }
  198. #endregion
  199. #region "Count"
  200. public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  201. {
  202. value ^= value >> 1;
  203. int highBit = size - 2;
  204. for (int bit = highBit; bit >= 0; bit--)
  205. {
  206. if (((int)(value >> bit) & 0b1) != 0)
  207. {
  208. return (ulong)(highBit - bit);
  209. }
  210. }
  211. return (ulong)(size - 1);
  212. }
  213. private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
  214. public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  215. {
  216. if (value == 0ul)
  217. {
  218. return (ulong)size;
  219. }
  220. int nibbleIdx = size;
  221. int preCount, count = 0;
  222. do
  223. {
  224. nibbleIdx -= 4;
  225. preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
  226. count += preCount;
  227. }
  228. while (preCount == 4);
  229. return (ulong)count;
  230. }
  231. #endregion
  232. #region "Table"
  233. public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
  234. {
  235. return TblOrTbx(default, vector, bytes, tb0);
  236. }
  237. public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
  238. {
  239. return TblOrTbx(default, vector, bytes, tb0, tb1);
  240. }
  241. public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
  242. {
  243. return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
  244. }
  245. public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
  246. {
  247. return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
  248. }
  249. public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
  250. {
  251. return TblOrTbx(dest, vector, bytes, tb0);
  252. }
  253. public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
  254. {
  255. return TblOrTbx(dest, vector, bytes, tb0, tb1);
  256. }
  257. public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
  258. {
  259. return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
  260. }
  261. public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
  262. {
  263. return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
  264. }
  265. private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
  266. {
  267. byte[] res = new byte[16];
  268. if (dest != default)
  269. {
  270. Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
  271. }
  272. byte[] table = new byte[tb.Length * 16];
  273. for (byte index = 0; index < tb.Length; index++)
  274. {
  275. Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
  276. }
  277. byte[] v = vector.ToArray();
  278. for (byte index = 0; index < bytes; index++)
  279. {
  280. byte tblIndex = v[index];
  281. if (tblIndex < table.Length)
  282. {
  283. res[index] = table[tblIndex];
  284. }
  285. }
  286. return new V128(res);
  287. }
  288. #endregion
  289. #region "Crc32"
  290. private const uint Crc32RevPoly = 0xedb88320;
  291. private const uint Crc32cRevPoly = 0x82f63b78;
  292. public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
  293. public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
  294. public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
  295. public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
  296. public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
  297. public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
  298. public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
  299. public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
  300. private static uint Crc32h(uint crc, uint poly, ushort val)
  301. {
  302. crc = Crc32(crc, poly, (byte)(val >> 0));
  303. crc = Crc32(crc, poly, (byte)(val >> 8));
  304. return crc;
  305. }
  306. private static uint Crc32w(uint crc, uint poly, uint val)
  307. {
  308. crc = Crc32(crc, poly, (byte)(val >> 0));
  309. crc = Crc32(crc, poly, (byte)(val >> 8));
  310. crc = Crc32(crc, poly, (byte)(val >> 16));
  311. crc = Crc32(crc, poly, (byte)(val >> 24));
  312. return crc;
  313. }
  314. private static uint Crc32x(uint crc, uint poly, ulong val)
  315. {
  316. crc = Crc32(crc, poly, (byte)(val >> 0));
  317. crc = Crc32(crc, poly, (byte)(val >> 8));
  318. crc = Crc32(crc, poly, (byte)(val >> 16));
  319. crc = Crc32(crc, poly, (byte)(val >> 24));
  320. crc = Crc32(crc, poly, (byte)(val >> 32));
  321. crc = Crc32(crc, poly, (byte)(val >> 40));
  322. crc = Crc32(crc, poly, (byte)(val >> 48));
  323. crc = Crc32(crc, poly, (byte)(val >> 56));
  324. return crc;
  325. }
  326. private static uint Crc32(uint crc, uint poly, byte val)
  327. {
  328. crc ^= val;
  329. for (int bit = 7; bit >= 0; bit--)
  330. {
  331. uint mask = (uint)(-(int)(crc & 1));
  332. crc = (crc >> 1) ^ (poly & mask);
  333. }
  334. return crc;
  335. }
  336. #endregion
  337. #region "Aes"
  338. public static V128 Decrypt(V128 value, V128 roundKey)
  339. {
  340. return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
  341. }
  342. public static V128 Encrypt(V128 value, V128 roundKey)
  343. {
  344. return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
  345. }
  346. public static V128 InverseMixColumns(V128 value)
  347. {
  348. return CryptoHelper.AesInvMixColumns(value);
  349. }
  350. public static V128 MixColumns(V128 value)
  351. {
  352. return CryptoHelper.AesMixColumns(value);
  353. }
  354. #endregion
  355. #region "Sha1"
  356. public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
  357. {
  358. for (int e = 0; e <= 3; e++)
  359. {
  360. uint t = ShaChoose(hash_abcd.Extract<uint>(1),
  361. hash_abcd.Extract<uint>(2),
  362. hash_abcd.Extract<uint>(3));
  363. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  364. t = Rol(hash_abcd.Extract<uint>(1), 30);
  365. hash_abcd.Insert(1, t);
  366. Rol32_160(ref hash_e, ref hash_abcd);
  367. }
  368. return hash_abcd;
  369. }
  370. public static uint FixedRotate(uint hash_e)
  371. {
  372. return hash_e.Rol(30);
  373. }
  374. public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
  375. {
  376. for (int e = 0; e <= 3; e++)
  377. {
  378. uint t = ShaMajority(hash_abcd.Extract<uint>(1),
  379. hash_abcd.Extract<uint>(2),
  380. hash_abcd.Extract<uint>(3));
  381. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  382. t = Rol(hash_abcd.Extract<uint>(1), 30);
  383. hash_abcd.Insert(1, t);
  384. Rol32_160(ref hash_e, ref hash_abcd);
  385. }
  386. return hash_abcd;
  387. }
  388. public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
  389. {
  390. for (int e = 0; e <= 3; e++)
  391. {
  392. uint t = ShaParity(hash_abcd.Extract<uint>(1),
  393. hash_abcd.Extract<uint>(2),
  394. hash_abcd.Extract<uint>(3));
  395. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  396. t = Rol(hash_abcd.Extract<uint>(1), 30);
  397. hash_abcd.Insert(1, t);
  398. Rol32_160(ref hash_e, ref hash_abcd);
  399. }
  400. return hash_abcd;
  401. }
  402. public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
  403. {
  404. ulong t2 = w4_7.Extract<ulong>(0);
  405. ulong t1 = w0_3.Extract<ulong>(1);
  406. V128 result = new V128(t1, t2);
  407. return result ^ (w0_3 ^ w8_11);
  408. }
  409. public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
  410. {
  411. V128 t = tw0_3 ^ (w12_15 >> 32);
  412. uint tE0 = t.Extract<uint>(0);
  413. uint tE1 = t.Extract<uint>(1);
  414. uint tE2 = t.Extract<uint>(2);
  415. uint tE3 = t.Extract<uint>(3);
  416. return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
  417. }
  418. private static void Rol32_160(ref uint y, ref V128 x)
  419. {
  420. uint xE3 = x.Extract<uint>(3);
  421. x <<= 32;
  422. x.Insert(0, y);
  423. y = xE3;
  424. }
  425. private static uint ShaChoose(uint x, uint y, uint z)
  426. {
  427. return ((y ^ z) & x) ^ z;
  428. }
  429. private static uint ShaMajority(uint x, uint y, uint z)
  430. {
  431. return (x & y) | ((x | y) & z);
  432. }
  433. private static uint ShaParity(uint x, uint y, uint z)
  434. {
  435. return x ^ y ^ z;
  436. }
  437. private static uint Rol(this uint value, int count)
  438. {
  439. return (value << count) | (value >> (32 - count));
  440. }
  441. #endregion
  442. #region "Sha256"
  443. public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
  444. {
  445. return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
  446. }
  447. public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
  448. {
  449. return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
  450. }
  451. public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
  452. {
  453. V128 result = new V128();
  454. for (int e = 0; e <= 3; e++)
  455. {
  456. uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0);
  457. elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
  458. elt += w0_3.Extract<uint>(e);
  459. result.Insert(e, elt);
  460. }
  461. return result;
  462. }
  463. public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
  464. {
  465. V128 result = new V128();
  466. ulong t1 = w12_15.Extract<ulong>(1);
  467. for (int e = 0; e <= 1; e++)
  468. {
  469. uint elt = t1.ULongPart(e);
  470. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  471. elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1);
  472. result.Insert(e, elt);
  473. }
  474. t1 = result.Extract<ulong>(0);
  475. for (int e = 2; e <= 3; e++)
  476. {
  477. uint elt = t1.ULongPart(e - 2);
  478. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  479. elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0);
  480. result.Insert(e, elt);
  481. }
  482. return result;
  483. }
  484. private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
  485. {
  486. for (int e = 0; e <= 3; e++)
  487. {
  488. uint chs = ShaChoose(y.Extract<uint>(0),
  489. y.Extract<uint>(1),
  490. y.Extract<uint>(2));
  491. uint maj = ShaMajority(x.Extract<uint>(0),
  492. x.Extract<uint>(1),
  493. x.Extract<uint>(2));
  494. uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e);
  495. uint t2 = t1 + x.Extract<uint>(3);
  496. x.Insert(3, t2);
  497. t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj;
  498. y.Insert(3, t2);
  499. Rol32_256(ref y, ref x);
  500. }
  501. return part1 ? x : y;
  502. }
  503. private static void Rol32_256(ref V128 y, ref V128 x)
  504. {
  505. uint yE3 = y.Extract<uint>(3);
  506. uint xE3 = x.Extract<uint>(3);
  507. y <<= 32;
  508. x <<= 32;
  509. y.Insert(0, xE3);
  510. x.Insert(0, yE3);
  511. }
  512. private static uint ShaHashSigma0(uint x)
  513. {
  514. return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
  515. }
  516. private static uint ShaHashSigma1(uint x)
  517. {
  518. return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
  519. }
  520. private static uint Ror(this uint value, int count)
  521. {
  522. return (value >> count) | (value << (32 - count));
  523. }
  524. private static uint Lsr(this uint value, int count)
  525. {
  526. return value >> count;
  527. }
  528. private static uint ULongPart(this ulong value, int part)
  529. {
  530. return part == 0
  531. ? (uint)(value & 0xFFFFFFFFUL)
  532. : (uint)(value >> 32);
  533. }
  534. #endregion
  535. public static V128 PolynomialMult64_128(ulong op1, ulong op2)
  536. {
  537. V128 result = V128.Zero;
  538. V128 op2_128 = new V128(op2, 0);
  539. for (int i = 0; i < 64; i++)
  540. {
  541. if (((op1 >> i) & 1) == 1)
  542. {
  543. result ^= op2_128 << i;
  544. }
  545. }
  546. return result;
  547. }
  548. }
  549. }