SoftFallback.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. using ARMeilleure.State;
  2. using System;
  3. namespace ARMeilleure.Instructions
  4. {
  5. static class SoftFallback
  6. {
  7. #region "ShrImm64"
  8. public static long SignedShrImm64(long value, long roundConst, int shift)
  9. {
  10. if (roundConst == 0L)
  11. {
  12. if (shift <= 63)
  13. {
  14. return value >> shift;
  15. }
  16. else /* if (shift == 64) */
  17. {
  18. if (value < 0L)
  19. {
  20. return -1L;
  21. }
  22. else /* if (value >= 0L) */
  23. {
  24. return 0L;
  25. }
  26. }
  27. }
  28. else /* if (roundConst == 1L << (shift - 1)) */
  29. {
  30. if (shift <= 63)
  31. {
  32. long add = value + roundConst;
  33. if ((~value & (value ^ add)) < 0L)
  34. {
  35. return (long)((ulong)add >> shift);
  36. }
  37. else
  38. {
  39. return add >> shift;
  40. }
  41. }
  42. else /* if (shift == 64) */
  43. {
  44. return 0L;
  45. }
  46. }
  47. }
  48. public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
  49. {
  50. if (roundConst == 0L)
  51. {
  52. if (shift <= 63)
  53. {
  54. return value >> shift;
  55. }
  56. else /* if (shift == 64) */
  57. {
  58. return 0UL;
  59. }
  60. }
  61. else /* if (roundConst == 1L << (shift - 1)) */
  62. {
  63. ulong add = value + (ulong)roundConst;
  64. if ((add < value) && (add < (ulong)roundConst))
  65. {
  66. if (shift <= 63)
  67. {
  68. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  69. }
  70. else /* if (shift == 64) */
  71. {
  72. return 1UL;
  73. }
  74. }
  75. else
  76. {
  77. if (shift <= 63)
  78. {
  79. return add >> shift;
  80. }
  81. else /* if (shift == 64) */
  82. {
  83. return 0UL;
  84. }
  85. }
  86. }
  87. }
  88. #endregion
  89. #region "Saturation"
  90. public static int SatF32ToS32(float value)
  91. {
  92. if (float.IsNaN(value)) return 0;
  93. return value >= int.MaxValue ? int.MaxValue :
  94. value <= int.MinValue ? int.MinValue : (int)value;
  95. }
  96. public static long SatF32ToS64(float value)
  97. {
  98. if (float.IsNaN(value)) return 0;
  99. return value >= long.MaxValue ? long.MaxValue :
  100. value <= long.MinValue ? long.MinValue : (long)value;
  101. }
  102. public static uint SatF32ToU32(float value)
  103. {
  104. if (float.IsNaN(value)) return 0;
  105. return value >= uint.MaxValue ? uint.MaxValue :
  106. value <= uint.MinValue ? uint.MinValue : (uint)value;
  107. }
  108. public static ulong SatF32ToU64(float value)
  109. {
  110. if (float.IsNaN(value)) return 0;
  111. return value >= ulong.MaxValue ? ulong.MaxValue :
  112. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  113. }
  114. public static int SatF64ToS32(double value)
  115. {
  116. if (double.IsNaN(value)) return 0;
  117. return value >= int.MaxValue ? int.MaxValue :
  118. value <= int.MinValue ? int.MinValue : (int)value;
  119. }
  120. public static long SatF64ToS64(double value)
  121. {
  122. if (double.IsNaN(value)) return 0;
  123. return value >= long.MaxValue ? long.MaxValue :
  124. value <= long.MinValue ? long.MinValue : (long)value;
  125. }
  126. public static uint SatF64ToU32(double value)
  127. {
  128. if (double.IsNaN(value)) return 0;
  129. return value >= uint.MaxValue ? uint.MaxValue :
  130. value <= uint.MinValue ? uint.MinValue : (uint)value;
  131. }
  132. public static ulong SatF64ToU64(double value)
  133. {
  134. if (double.IsNaN(value)) return 0;
  135. return value >= ulong.MaxValue ? ulong.MaxValue :
  136. value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
  137. }
  138. #endregion
  139. #region "Count"
  140. public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  141. {
  142. value ^= value >> 1;
  143. int highBit = size - 2;
  144. for (int bit = highBit; bit >= 0; bit--)
  145. {
  146. if (((int)(value >> bit) & 0b1) != 0)
  147. {
  148. return (ulong)(highBit - bit);
  149. }
  150. }
  151. return (ulong)(size - 1);
  152. }
  153. private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
  154. public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  155. {
  156. if (value == 0ul)
  157. {
  158. return (ulong)size;
  159. }
  160. int nibbleIdx = size;
  161. int preCount, count = 0;
  162. do
  163. {
  164. nibbleIdx -= 4;
  165. preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
  166. count += preCount;
  167. }
  168. while (preCount == 4);
  169. return (ulong)count;
  170. }
  171. #endregion
  172. #region "Table"
  173. public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
  174. {
  175. return TblOrTbx(default, vector, bytes, tb0);
  176. }
  177. public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
  178. {
  179. return TblOrTbx(default, vector, bytes, tb0, tb1);
  180. }
  181. public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
  182. {
  183. return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
  184. }
  185. public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
  186. {
  187. return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
  188. }
  189. public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
  190. {
  191. return TblOrTbx(dest, vector, bytes, tb0);
  192. }
  193. public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
  194. {
  195. return TblOrTbx(dest, vector, bytes, tb0, tb1);
  196. }
  197. public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
  198. {
  199. return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
  200. }
  201. public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
  202. {
  203. return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
  204. }
  205. private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
  206. {
  207. byte[] res = new byte[16];
  208. if (dest != default)
  209. {
  210. Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
  211. }
  212. byte[] table = new byte[tb.Length * 16];
  213. for (byte index = 0; index < tb.Length; index++)
  214. {
  215. Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
  216. }
  217. byte[] v = vector.ToArray();
  218. for (byte index = 0; index < bytes; index++)
  219. {
  220. byte tblIndex = v[index];
  221. if (tblIndex < table.Length)
  222. {
  223. res[index] = table[tblIndex];
  224. }
  225. }
  226. return new V128(res);
  227. }
  228. #endregion
  229. #region "Crc32"
  230. private const uint Crc32RevPoly = 0xedb88320;
  231. private const uint Crc32cRevPoly = 0x82f63b78;
  232. public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
  233. public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
  234. public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
  235. public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
  236. public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
  237. public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
  238. public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
  239. public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
  240. private static uint Crc32h(uint crc, uint poly, ushort val)
  241. {
  242. crc = Crc32(crc, poly, (byte)(val >> 0));
  243. crc = Crc32(crc, poly, (byte)(val >> 8));
  244. return crc;
  245. }
  246. private static uint Crc32w(uint crc, uint poly, uint val)
  247. {
  248. crc = Crc32(crc, poly, (byte)(val >> 0));
  249. crc = Crc32(crc, poly, (byte)(val >> 8));
  250. crc = Crc32(crc, poly, (byte)(val >> 16));
  251. crc = Crc32(crc, poly, (byte)(val >> 24));
  252. return crc;
  253. }
  254. private static uint Crc32x(uint crc, uint poly, ulong val)
  255. {
  256. crc = Crc32(crc, poly, (byte)(val >> 0));
  257. crc = Crc32(crc, poly, (byte)(val >> 8));
  258. crc = Crc32(crc, poly, (byte)(val >> 16));
  259. crc = Crc32(crc, poly, (byte)(val >> 24));
  260. crc = Crc32(crc, poly, (byte)(val >> 32));
  261. crc = Crc32(crc, poly, (byte)(val >> 40));
  262. crc = Crc32(crc, poly, (byte)(val >> 48));
  263. crc = Crc32(crc, poly, (byte)(val >> 56));
  264. return crc;
  265. }
  266. private static uint Crc32(uint crc, uint poly, byte val)
  267. {
  268. crc ^= val;
  269. for (int bit = 7; bit >= 0; bit--)
  270. {
  271. uint mask = (uint)(-(int)(crc & 1));
  272. crc = (crc >> 1) ^ (poly & mask);
  273. }
  274. return crc;
  275. }
  276. #endregion
  277. #region "Aes"
  278. public static V128 Decrypt(V128 value, V128 roundKey)
  279. {
  280. return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
  281. }
  282. public static V128 Encrypt(V128 value, V128 roundKey)
  283. {
  284. return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
  285. }
  286. public static V128 InverseMixColumns(V128 value)
  287. {
  288. return CryptoHelper.AesInvMixColumns(value);
  289. }
  290. public static V128 MixColumns(V128 value)
  291. {
  292. return CryptoHelper.AesMixColumns(value);
  293. }
  294. #endregion
  295. #region "Sha1"
  296. public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
  297. {
  298. for (int e = 0; e <= 3; e++)
  299. {
  300. uint t = ShaChoose(hash_abcd.Extract<uint>(1),
  301. hash_abcd.Extract<uint>(2),
  302. hash_abcd.Extract<uint>(3));
  303. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  304. t = Rol(hash_abcd.Extract<uint>(1), 30);
  305. hash_abcd.Insert(1, t);
  306. Rol32_160(ref hash_e, ref hash_abcd);
  307. }
  308. return hash_abcd;
  309. }
  310. public static uint FixedRotate(uint hash_e)
  311. {
  312. return hash_e.Rol(30);
  313. }
  314. public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
  315. {
  316. for (int e = 0; e <= 3; e++)
  317. {
  318. uint t = ShaMajority(hash_abcd.Extract<uint>(1),
  319. hash_abcd.Extract<uint>(2),
  320. hash_abcd.Extract<uint>(3));
  321. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  322. t = Rol(hash_abcd.Extract<uint>(1), 30);
  323. hash_abcd.Insert(1, t);
  324. Rol32_160(ref hash_e, ref hash_abcd);
  325. }
  326. return hash_abcd;
  327. }
  328. public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
  329. {
  330. for (int e = 0; e <= 3; e++)
  331. {
  332. uint t = ShaParity(hash_abcd.Extract<uint>(1),
  333. hash_abcd.Extract<uint>(2),
  334. hash_abcd.Extract<uint>(3));
  335. hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
  336. t = Rol(hash_abcd.Extract<uint>(1), 30);
  337. hash_abcd.Insert(1, t);
  338. Rol32_160(ref hash_e, ref hash_abcd);
  339. }
  340. return hash_abcd;
  341. }
  342. public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
  343. {
  344. ulong t2 = w4_7.Extract<ulong>(0);
  345. ulong t1 = w0_3.Extract<ulong>(1);
  346. V128 result = new V128(t1, t2);
  347. return result ^ (w0_3 ^ w8_11);
  348. }
  349. public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
  350. {
  351. V128 t = tw0_3 ^ (w12_15 >> 32);
  352. uint tE0 = t.Extract<uint>(0);
  353. uint tE1 = t.Extract<uint>(1);
  354. uint tE2 = t.Extract<uint>(2);
  355. uint tE3 = t.Extract<uint>(3);
  356. return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
  357. }
  358. private static void Rol32_160(ref uint y, ref V128 x)
  359. {
  360. uint xE3 = x.Extract<uint>(3);
  361. x <<= 32;
  362. x.Insert(0, y);
  363. y = xE3;
  364. }
  365. private static uint ShaChoose(uint x, uint y, uint z)
  366. {
  367. return ((y ^ z) & x) ^ z;
  368. }
  369. private static uint ShaMajority(uint x, uint y, uint z)
  370. {
  371. return (x & y) | ((x | y) & z);
  372. }
  373. private static uint ShaParity(uint x, uint y, uint z)
  374. {
  375. return x ^ y ^ z;
  376. }
  377. private static uint Rol(this uint value, int count)
  378. {
  379. return (value << count) | (value >> (32 - count));
  380. }
  381. #endregion
  382. #region "Sha256"
  383. public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
  384. {
  385. return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
  386. }
  387. public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
  388. {
  389. return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
  390. }
  391. public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
  392. {
  393. V128 result = new V128();
  394. for (int e = 0; e <= 3; e++)
  395. {
  396. uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0);
  397. elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
  398. elt += w0_3.Extract<uint>(e);
  399. result.Insert(e, elt);
  400. }
  401. return result;
  402. }
  403. public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
  404. {
  405. V128 result = new V128();
  406. ulong t1 = w12_15.Extract<ulong>(1);
  407. for (int e = 0; e <= 1; e++)
  408. {
  409. uint elt = t1.ULongPart(e);
  410. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  411. elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1);
  412. result.Insert(e, elt);
  413. }
  414. t1 = result.Extract<ulong>(0);
  415. for (int e = 2; e <= 3; e++)
  416. {
  417. uint elt = t1.ULongPart(e - 2);
  418. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  419. elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0);
  420. result.Insert(e, elt);
  421. }
  422. return result;
  423. }
  424. private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
  425. {
  426. for (int e = 0; e <= 3; e++)
  427. {
  428. uint chs = ShaChoose(y.Extract<uint>(0),
  429. y.Extract<uint>(1),
  430. y.Extract<uint>(2));
  431. uint maj = ShaMajority(x.Extract<uint>(0),
  432. x.Extract<uint>(1),
  433. x.Extract<uint>(2));
  434. uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e);
  435. uint t2 = t1 + x.Extract<uint>(3);
  436. x.Insert(3, t2);
  437. t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj;
  438. y.Insert(3, t2);
  439. Rol32_256(ref y, ref x);
  440. }
  441. return part1 ? x : y;
  442. }
  443. private static void Rol32_256(ref V128 y, ref V128 x)
  444. {
  445. uint yE3 = y.Extract<uint>(3);
  446. uint xE3 = x.Extract<uint>(3);
  447. y <<= 32;
  448. x <<= 32;
  449. y.Insert(0, xE3);
  450. x.Insert(0, yE3);
  451. }
  452. private static uint ShaHashSigma0(uint x)
  453. {
  454. return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
  455. }
  456. private static uint ShaHashSigma1(uint x)
  457. {
  458. return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
  459. }
  460. private static uint Ror(this uint value, int count)
  461. {
  462. return (value >> count) | (value << (32 - count));
  463. }
  464. private static uint Lsr(this uint value, int count)
  465. {
  466. return value >> count;
  467. }
  468. private static uint ULongPart(this ulong value, int part)
  469. {
  470. return part == 0
  471. ? (uint)(value & 0xFFFFFFFFUL)
  472. : (uint)(value >> 32);
  473. }
  474. #endregion
  475. public static V128 PolynomialMult64_128(ulong op1, ulong op2)
  476. {
  477. V128 result = V128.Zero;
  478. V128 op2_128 = new V128(op2, 0);
  479. for (int i = 0; i < 64; i++)
  480. {
  481. if (((op1 >> i) & 1) == 1)
  482. {
  483. result ^= op2_128 << i;
  484. }
  485. }
  486. return result;
  487. }
  488. }
  489. }