SoftFallback.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. using ChocolArm64.State;
  2. using ChocolArm64.Translation;
  3. using System;
  4. using System.Runtime.CompilerServices;
  5. using System.Runtime.Intrinsics;
  6. using System.Runtime.Intrinsics.X86;
  7. namespace ChocolArm64.Instructions
  8. {
  9. using static VectorHelper;
  10. static class SoftFallback
  11. {
  12. public static void EmitCall(ILEmitterCtx context, string mthdName)
  13. {
  14. context.EmitCall(typeof(SoftFallback), mthdName);
  15. }
  16. #region "ShlReg"
  17. public static long SignedShlReg(long value, long shift, bool round, int size)
  18. {
  19. int eSize = 8 << size;
  20. int shiftLsB = (sbyte)shift;
  21. if (shiftLsB < 0)
  22. {
  23. return SignedShrReg(value, -shiftLsB, round, eSize);
  24. }
  25. else if (shiftLsB > 0)
  26. {
  27. if (shiftLsB >= eSize)
  28. {
  29. return 0L;
  30. }
  31. return value << shiftLsB;
  32. }
  33. else /* if (shiftLsB == 0) */
  34. {
  35. return value;
  36. }
  37. }
  38. public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
  39. {
  40. int eSize = 8 << size;
  41. int shiftLsB = (sbyte)shift;
  42. if (shiftLsB < 0)
  43. {
  44. return UnsignedShrReg(value, -shiftLsB, round, eSize);
  45. }
  46. else if (shiftLsB > 0)
  47. {
  48. if (shiftLsB >= eSize)
  49. {
  50. return 0UL;
  51. }
  52. return value << shiftLsB;
  53. }
  54. else /* if (shiftLsB == 0) */
  55. {
  56. return value;
  57. }
  58. }
  59. public static long SignedShlRegSatQ(long value, long shift, bool round, int size, CpuThreadState state)
  60. {
  61. int eSize = 8 << size;
  62. int shiftLsB = (sbyte)shift;
  63. if (shiftLsB < 0)
  64. {
  65. return SignedShrReg(value, -shiftLsB, round, eSize);
  66. }
  67. else if (shiftLsB > 0)
  68. {
  69. if (shiftLsB >= eSize)
  70. {
  71. return SignedSignSatQ(value, eSize, state);
  72. }
  73. if (eSize == 64)
  74. {
  75. long shl = value << shiftLsB;
  76. long shr = shl >> shiftLsB;
  77. if (shr != value)
  78. {
  79. return SignedSignSatQ(value, eSize, state);
  80. }
  81. else /* if (shr == value) */
  82. {
  83. return shl;
  84. }
  85. }
  86. else /* if (eSize != 64) */
  87. {
  88. return SignedSrcSignedDstSatQ(value << shiftLsB, size, state);
  89. }
  90. }
  91. else /* if (shiftLsB == 0) */
  92. {
  93. return value;
  94. }
  95. }
  96. public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size, CpuThreadState state)
  97. {
  98. int eSize = 8 << size;
  99. int shiftLsB = (sbyte)shift;
  100. if (shiftLsB < 0)
  101. {
  102. return UnsignedShrReg(value, -shiftLsB, round, eSize);
  103. }
  104. else if (shiftLsB > 0)
  105. {
  106. if (shiftLsB >= eSize)
  107. {
  108. return UnsignedSignSatQ(value, eSize, state);
  109. }
  110. if (eSize == 64)
  111. {
  112. ulong shl = value << shiftLsB;
  113. ulong shr = shl >> shiftLsB;
  114. if (shr != value)
  115. {
  116. return UnsignedSignSatQ(value, eSize, state);
  117. }
  118. else /* if (shr == value) */
  119. {
  120. return shl;
  121. }
  122. }
  123. else /* if (eSize != 64) */
  124. {
  125. return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size, state);
  126. }
  127. }
  128. else /* if (shiftLsB == 0) */
  129. {
  130. return value;
  131. }
  132. }
  133. private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  134. {
  135. if (round)
  136. {
  137. if (shift >= eSize)
  138. {
  139. return 0L;
  140. }
  141. long roundConst = 1L << (shift - 1);
  142. long add = value + roundConst;
  143. if (eSize == 64)
  144. {
  145. if ((~value & (value ^ add)) < 0L)
  146. {
  147. return (long)((ulong)add >> shift);
  148. }
  149. else
  150. {
  151. return add >> shift;
  152. }
  153. }
  154. else /* if (eSize != 64) */
  155. {
  156. return add >> shift;
  157. }
  158. }
  159. else /* if (!round) */
  160. {
  161. if (shift >= eSize)
  162. {
  163. if (value < 0L)
  164. {
  165. return -1L;
  166. }
  167. else /* if (value >= 0L) */
  168. {
  169. return 0L;
  170. }
  171. }
  172. return value >> shift;
  173. }
  174. }
  175. private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  176. {
  177. if (round)
  178. {
  179. if (shift > 64)
  180. {
  181. return 0UL;
  182. }
  183. ulong roundConst = 1UL << (shift - 1);
  184. ulong add = value + roundConst;
  185. if (eSize == 64)
  186. {
  187. if ((add < value) && (add < roundConst))
  188. {
  189. if (shift == 64)
  190. {
  191. return 1UL;
  192. }
  193. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  194. }
  195. else
  196. {
  197. if (shift == 64)
  198. {
  199. return 0UL;
  200. }
  201. return add >> shift;
  202. }
  203. }
  204. else /* if (eSize != 64) */
  205. {
  206. if (shift == 64)
  207. {
  208. return 0UL;
  209. }
  210. return add >> shift;
  211. }
  212. }
  213. else /* if (!round) */
  214. {
  215. if (shift >= eSize)
  216. {
  217. return 0UL;
  218. }
  219. return value >> shift;
  220. }
  221. }
  222. private static long SignedSignSatQ(long op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
  223. {
  224. long tMaxValue = (1L << (eSize - 1)) - 1L;
  225. long tMinValue = -(1L << (eSize - 1));
  226. if (op > 0L)
  227. {
  228. state.SetFpsrFlag(Fpsr.Qc);
  229. return tMaxValue;
  230. }
  231. else if (op < 0L)
  232. {
  233. state.SetFpsrFlag(Fpsr.Qc);
  234. return tMinValue;
  235. }
  236. else
  237. {
  238. return 0L;
  239. }
  240. }
  241. private static ulong UnsignedSignSatQ(ulong op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
  242. {
  243. ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
  244. if (op > 0UL)
  245. {
  246. state.SetFpsrFlag(Fpsr.Qc);
  247. return tMaxValue;
  248. }
  249. else
  250. {
  251. return 0UL;
  252. }
  253. }
  254. #endregion
  255. #region "ShrImm64"
  256. public static long SignedShrImm64(long value, long roundConst, int shift)
  257. {
  258. if (roundConst == 0L)
  259. {
  260. if (shift <= 63)
  261. {
  262. return value >> shift;
  263. }
  264. else /* if (shift == 64) */
  265. {
  266. if (value < 0L)
  267. {
  268. return -1L;
  269. }
  270. else /* if (value >= 0L) */
  271. {
  272. return 0L;
  273. }
  274. }
  275. }
  276. else /* if (roundConst == 1L << (shift - 1)) */
  277. {
  278. if (shift <= 63)
  279. {
  280. long add = value + roundConst;
  281. if ((~value & (value ^ add)) < 0L)
  282. {
  283. return (long)((ulong)add >> shift);
  284. }
  285. else
  286. {
  287. return add >> shift;
  288. }
  289. }
  290. else /* if (shift == 64) */
  291. {
  292. return 0L;
  293. }
  294. }
  295. }
  296. public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
  297. {
  298. if (roundConst == 0L)
  299. {
  300. if (shift <= 63)
  301. {
  302. return value >> shift;
  303. }
  304. else /* if (shift == 64) */
  305. {
  306. return 0UL;
  307. }
  308. }
  309. else /* if (roundConst == 1L << (shift - 1)) */
  310. {
  311. ulong add = value + (ulong)roundConst;
  312. if ((add < value) && (add < (ulong)roundConst))
  313. {
  314. if (shift <= 63)
  315. {
  316. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  317. }
  318. else /* if (shift == 64) */
  319. {
  320. return 1UL;
  321. }
  322. }
  323. else
  324. {
  325. if (shift <= 63)
  326. {
  327. return add >> shift;
  328. }
  329. else /* if (shift == 64) */
  330. {
  331. return 0UL;
  332. }
  333. }
  334. }
  335. }
  336. #endregion
  337. #region "Saturating"
  338. public static long SignedSrcSignedDstSatQ(long op, int size, CpuThreadState state)
  339. {
  340. int eSize = 8 << size;
  341. long tMaxValue = (1L << (eSize - 1)) - 1L;
  342. long tMinValue = -(1L << (eSize - 1));
  343. if (op > tMaxValue)
  344. {
  345. state.SetFpsrFlag(Fpsr.Qc);
  346. return tMaxValue;
  347. }
  348. else if (op < tMinValue)
  349. {
  350. state.SetFpsrFlag(Fpsr.Qc);
  351. return tMinValue;
  352. }
  353. else
  354. {
  355. return op;
  356. }
  357. }
  358. public static ulong SignedSrcUnsignedDstSatQ(long op, int size, CpuThreadState state)
  359. {
  360. int eSize = 8 << size;
  361. ulong tMaxValue = (1UL << eSize) - 1UL;
  362. ulong tMinValue = 0UL;
  363. if (op > (long)tMaxValue)
  364. {
  365. state.SetFpsrFlag(Fpsr.Qc);
  366. return tMaxValue;
  367. }
  368. else if (op < (long)tMinValue)
  369. {
  370. state.SetFpsrFlag(Fpsr.Qc);
  371. return tMinValue;
  372. }
  373. else
  374. {
  375. return (ulong)op;
  376. }
  377. }
  378. public static long UnsignedSrcSignedDstSatQ(ulong op, int size, CpuThreadState state)
  379. {
  380. int eSize = 8 << size;
  381. long tMaxValue = (1L << (eSize - 1)) - 1L;
  382. if (op > (ulong)tMaxValue)
  383. {
  384. state.SetFpsrFlag(Fpsr.Qc);
  385. return tMaxValue;
  386. }
  387. else
  388. {
  389. return (long)op;
  390. }
  391. }
  392. public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size, CpuThreadState state)
  393. {
  394. int eSize = 8 << size;
  395. ulong tMaxValue = (1UL << eSize) - 1UL;
  396. if (op > tMaxValue)
  397. {
  398. state.SetFpsrFlag(Fpsr.Qc);
  399. return tMaxValue;
  400. }
  401. else
  402. {
  403. return op;
  404. }
  405. }
  406. public static long UnarySignedSatQAbsOrNeg(long op, CpuThreadState state)
  407. {
  408. if (op == long.MinValue)
  409. {
  410. state.SetFpsrFlag(Fpsr.Qc);
  411. return long.MaxValue;
  412. }
  413. else
  414. {
  415. return op;
  416. }
  417. }
  418. public static long BinarySignedSatQAdd(long op1, long op2, CpuThreadState state)
  419. {
  420. long add = op1 + op2;
  421. if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
  422. {
  423. state.SetFpsrFlag(Fpsr.Qc);
  424. if (op1 < 0L)
  425. {
  426. return long.MinValue;
  427. }
  428. else
  429. {
  430. return long.MaxValue;
  431. }
  432. }
  433. else
  434. {
  435. return add;
  436. }
  437. }
  438. public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, CpuThreadState state)
  439. {
  440. ulong add = op1 + op2;
  441. if ((add < op1) && (add < op2))
  442. {
  443. state.SetFpsrFlag(Fpsr.Qc);
  444. return ulong.MaxValue;
  445. }
  446. else
  447. {
  448. return add;
  449. }
  450. }
  451. public static long BinarySignedSatQSub(long op1, long op2, CpuThreadState state)
  452. {
  453. long sub = op1 - op2;
  454. if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
  455. {
  456. state.SetFpsrFlag(Fpsr.Qc);
  457. if (op1 < 0L)
  458. {
  459. return long.MinValue;
  460. }
  461. else
  462. {
  463. return long.MaxValue;
  464. }
  465. }
  466. else
  467. {
  468. return sub;
  469. }
  470. }
  471. public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, CpuThreadState state)
  472. {
  473. ulong sub = op1 - op2;
  474. if (op1 < op2)
  475. {
  476. state.SetFpsrFlag(Fpsr.Qc);
  477. return ulong.MinValue;
  478. }
  479. else
  480. {
  481. return sub;
  482. }
  483. }
  484. public static long BinarySignedSatQAcc(ulong op1, long op2, CpuThreadState state)
  485. {
  486. if (op1 <= (ulong)long.MaxValue)
  487. {
  488. // op1 from ulong.MinValue to (ulong)long.MaxValue
  489. // op2 from long.MinValue to long.MaxValue
  490. long add = (long)op1 + op2;
  491. if ((~op2 & add) < 0L)
  492. {
  493. state.SetFpsrFlag(Fpsr.Qc);
  494. return long.MaxValue;
  495. }
  496. else
  497. {
  498. return add;
  499. }
  500. }
  501. else if (op2 >= 0L)
  502. {
  503. // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  504. // op2 from (long)ulong.MinValue to long.MaxValue
  505. state.SetFpsrFlag(Fpsr.Qc);
  506. return long.MaxValue;
  507. }
  508. else
  509. {
  510. // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  511. // op2 from long.MinValue to (long)ulong.MinValue - 1L
  512. ulong add = op1 + (ulong)op2;
  513. if (add > (ulong)long.MaxValue)
  514. {
  515. state.SetFpsrFlag(Fpsr.Qc);
  516. return long.MaxValue;
  517. }
  518. else
  519. {
  520. return (long)add;
  521. }
  522. }
  523. }
  524. public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, CpuThreadState state)
  525. {
  526. if (op1 >= 0L)
  527. {
  528. // op1 from (long)ulong.MinValue to long.MaxValue
  529. // op2 from ulong.MinValue to ulong.MaxValue
  530. ulong add = (ulong)op1 + op2;
  531. if ((add < (ulong)op1) && (add < op2))
  532. {
  533. state.SetFpsrFlag(Fpsr.Qc);
  534. return ulong.MaxValue;
  535. }
  536. else
  537. {
  538. return add;
  539. }
  540. }
  541. else if (op2 > (ulong)long.MaxValue)
  542. {
  543. // op1 from long.MinValue to (long)ulong.MinValue - 1L
  544. // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  545. return (ulong)op1 + op2;
  546. }
  547. else
  548. {
  549. // op1 from long.MinValue to (long)ulong.MinValue - 1L
  550. // op2 from ulong.MinValue to (ulong)long.MaxValue
  551. long add = op1 + (long)op2;
  552. if (add < (long)ulong.MinValue)
  553. {
  554. state.SetFpsrFlag(Fpsr.Qc);
  555. return ulong.MinValue;
  556. }
  557. else
  558. {
  559. return (ulong)add;
  560. }
  561. }
  562. }
  563. #endregion
  564. #region "Count"
  565. public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  566. {
  567. value ^= value >> 1;
  568. int highBit = size - 2;
  569. for (int bit = highBit; bit >= 0; bit--)
  570. {
  571. if (((int)(value >> bit) & 0b1) != 0)
  572. {
  573. return (ulong)(highBit - bit);
  574. }
  575. }
  576. return (ulong)(size - 1);
  577. }
  578. private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
  579. public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  580. {
  581. if (value == 0ul)
  582. {
  583. return (ulong)size;
  584. }
  585. int nibbleIdx = size;
  586. int preCount, count = 0;
  587. do
  588. {
  589. nibbleIdx -= 4;
  590. preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
  591. count += preCount;
  592. }
  593. while (preCount == 4);
  594. return (ulong)count;
  595. }
  596. public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
  597. {
  598. value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
  599. value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
  600. return (value >> 4) + (value & 0x0ful);
  601. }
  602. #endregion
  603. #region "Crc32"
  604. private const uint Crc32RevPoly = 0xedb88320;
  605. private const uint Crc32CRevPoly = 0x82f63b78;
  606. public static uint Crc32B(uint crc, byte val) => Crc32 (crc, Crc32RevPoly, val);
  607. public static uint Crc32H(uint crc, ushort val) => Crc32H(crc, Crc32RevPoly, val);
  608. public static uint Crc32W(uint crc, uint val) => Crc32W(crc, Crc32RevPoly, val);
  609. public static uint Crc32X(uint crc, ulong val) => Crc32X(crc, Crc32RevPoly, val);
  610. public static uint Crc32Cb(uint crc, byte val) => Crc32 (crc, Crc32CRevPoly, val);
  611. public static uint Crc32Ch(uint crc, ushort val) => Crc32H(crc, Crc32CRevPoly, val);
  612. public static uint Crc32Cw(uint crc, uint val) => Crc32W(crc, Crc32CRevPoly, val);
  613. public static uint Crc32Cx(uint crc, ulong val) => Crc32X(crc, Crc32CRevPoly, val);
  614. private static uint Crc32H(uint crc, uint poly, ushort val)
  615. {
  616. crc = Crc32(crc, poly, (byte)(val >> 0));
  617. crc = Crc32(crc, poly, (byte)(val >> 8));
  618. return crc;
  619. }
  620. private static uint Crc32W(uint crc, uint poly, uint val)
  621. {
  622. crc = Crc32(crc, poly, (byte)(val >> 0 ));
  623. crc = Crc32(crc, poly, (byte)(val >> 8 ));
  624. crc = Crc32(crc, poly, (byte)(val >> 16));
  625. crc = Crc32(crc, poly, (byte)(val >> 24));
  626. return crc;
  627. }
  628. private static uint Crc32X(uint crc, uint poly, ulong val)
  629. {
  630. crc = Crc32(crc, poly, (byte)(val >> 0 ));
  631. crc = Crc32(crc, poly, (byte)(val >> 8 ));
  632. crc = Crc32(crc, poly, (byte)(val >> 16));
  633. crc = Crc32(crc, poly, (byte)(val >> 24));
  634. crc = Crc32(crc, poly, (byte)(val >> 32));
  635. crc = Crc32(crc, poly, (byte)(val >> 40));
  636. crc = Crc32(crc, poly, (byte)(val >> 48));
  637. crc = Crc32(crc, poly, (byte)(val >> 56));
  638. return crc;
  639. }
  640. private static uint Crc32(uint crc, uint poly, byte val)
  641. {
  642. crc ^= val;
  643. for (int bit = 7; bit >= 0; bit--)
  644. {
  645. uint mask = (uint)(-(int)(crc & 1));
  646. crc = (crc >> 1) ^ (poly & mask);
  647. }
  648. return crc;
  649. }
  650. #endregion
  651. #region "Aes"
  652. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  653. public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey)
  654. {
  655. if (!Sse.IsSupported)
  656. {
  657. throw new PlatformNotSupportedException();
  658. }
  659. return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(Sse.Xor(value, roundKey)));
  660. }
  661. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  662. public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey)
  663. {
  664. if (!Sse.IsSupported)
  665. {
  666. throw new PlatformNotSupportedException();
  667. }
  668. return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(Sse.Xor(value, roundKey)));
  669. }
  670. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  671. public static Vector128<float> InverseMixColumns(Vector128<float> value)
  672. {
  673. return CryptoHelper.AesInvMixColumns(value);
  674. }
  675. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  676. public static Vector128<float> MixColumns(Vector128<float> value)
  677. {
  678. return CryptoHelper.AesMixColumns(value);
  679. }
  680. #endregion
  681. #region "Sha1"
  682. public static Vector128<float> HashChoose(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  683. {
  684. for (int e = 0; e <= 3; e++)
  685. {
  686. uint t = ShaChoose((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  687. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  688. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  689. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  690. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  691. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  692. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  693. Rol32_160(ref hash_e, ref hash_abcd);
  694. }
  695. return hash_abcd;
  696. }
  697. public static uint FixedRotate(uint hash_e)
  698. {
  699. return hash_e.Rol(30);
  700. }
  701. public static Vector128<float> HashMajority(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  702. {
  703. for (int e = 0; e <= 3; e++)
  704. {
  705. uint t = ShaMajority((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  706. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  707. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  708. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  709. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  710. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  711. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  712. Rol32_160(ref hash_e, ref hash_abcd);
  713. }
  714. return hash_abcd;
  715. }
  716. public static Vector128<float> HashParity(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  717. {
  718. for (int e = 0; e <= 3; e++)
  719. {
  720. uint t = ShaParity((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  721. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  722. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  723. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  724. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  725. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  726. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  727. Rol32_160(ref hash_e, ref hash_abcd);
  728. }
  729. return hash_abcd;
  730. }
  731. public static Vector128<float> Sha1SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7, Vector128<float> w8_11)
  732. {
  733. if (!Sse.IsSupported)
  734. {
  735. throw new PlatformNotSupportedException();
  736. }
  737. Vector128<float> result = new Vector128<float>();
  738. ulong t2 = VectorExtractIntZx(w4_7, (byte)0, 3);
  739. ulong t1 = VectorExtractIntZx(w0_3, (byte)1, 3);
  740. result = VectorInsertInt((ulong)t1, result, (byte)0, 3);
  741. result = VectorInsertInt((ulong)t2, result, (byte)1, 3);
  742. return Sse.Xor(result, Sse.Xor(w0_3, w8_11));
  743. }
  744. public static Vector128<float> Sha1SchedulePart2(Vector128<float> tw0_3, Vector128<float> w12_15)
  745. {
  746. if (!Sse2.IsSupported)
  747. {
  748. throw new PlatformNotSupportedException();
  749. }
  750. Vector128<float> result = new Vector128<float>();
  751. Vector128<float> t = Sse.Xor(tw0_3, Sse.StaticCast<uint, float>(
  752. Sse2.ShiftRightLogical128BitLane(Sse.StaticCast<float, uint>(w12_15), (byte)4)));
  753. uint tE0 = (uint)VectorExtractIntZx(t, (byte)0, 2);
  754. uint tE1 = (uint)VectorExtractIntZx(t, (byte)1, 2);
  755. uint tE2 = (uint)VectorExtractIntZx(t, (byte)2, 2);
  756. uint tE3 = (uint)VectorExtractIntZx(t, (byte)3, 2);
  757. result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2);
  758. result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2);
  759. result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2);
  760. return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2);
  761. }
  762. private static void Rol32_160(ref uint y, ref Vector128<float> x)
  763. {
  764. if (!Sse2.IsSupported)
  765. {
  766. throw new PlatformNotSupportedException();
  767. }
  768. uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
  769. x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
  770. x = VectorInsertInt((ulong)y, x, (byte)0, 2);
  771. y = xE3;
  772. }
  773. private static uint ShaChoose(uint x, uint y, uint z)
  774. {
  775. return ((y ^ z) & x) ^ z;
  776. }
  777. private static uint ShaMajority(uint x, uint y, uint z)
  778. {
  779. return (x & y) | ((x | y) & z);
  780. }
  781. private static uint ShaParity(uint x, uint y, uint z)
  782. {
  783. return x ^ y ^ z;
  784. }
  785. private static uint Rol(this uint value, int count)
  786. {
  787. return (value << count) | (value >> (32 - count));
  788. }
  789. #endregion
  790. #region "Sha256"
  791. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  792. public static Vector128<float> HashLower(Vector128<float> hash_abcd, Vector128<float> hash_efgh, Vector128<float> wk)
  793. {
  794. return Sha256Hash(hash_abcd, hash_efgh, wk, true);
  795. }
  796. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  797. public static Vector128<float> HashUpper(Vector128<float> hash_efgh, Vector128<float> hash_abcd, Vector128<float> wk)
  798. {
  799. return Sha256Hash(hash_abcd, hash_efgh, wk, false);
  800. }
  801. public static Vector128<float> Sha256SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7)
  802. {
  803. Vector128<float> result = new Vector128<float>();
  804. for (int e = 0; e <= 3; e++)
  805. {
  806. uint elt = (uint)VectorExtractIntZx(e <= 2 ? w0_3 : w4_7, (byte)(e <= 2 ? e + 1 : 0), 2);
  807. elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
  808. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  809. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  810. }
  811. return result;
  812. }
  813. public static Vector128<float> Sha256SchedulePart2(Vector128<float> w0_3, Vector128<float> w8_11, Vector128<float> w12_15)
  814. {
  815. Vector128<float> result = new Vector128<float>();
  816. ulong t1 = VectorExtractIntZx(w12_15, (byte)1, 3);
  817. for (int e = 0; e <= 1; e++)
  818. {
  819. uint elt = t1.ULongPart(e);
  820. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  821. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  822. elt += (uint)VectorExtractIntZx(w8_11, (byte)(e + 1), 2);
  823. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  824. }
  825. t1 = VectorExtractIntZx(result, (byte)0, 3);
  826. for (int e = 2; e <= 3; e++)
  827. {
  828. uint elt = t1.ULongPart(e - 2);
  829. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  830. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  831. elt += (uint)VectorExtractIntZx(e == 2 ? w8_11 : w12_15, (byte)(e == 2 ? 3 : 0), 2);
  832. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  833. }
  834. return result;
  835. }
  836. private static Vector128<float> Sha256Hash(Vector128<float> x, Vector128<float> y, Vector128<float> w, bool part1)
  837. {
  838. for (int e = 0; e <= 3; e++)
  839. {
  840. uint chs = ShaChoose((uint)VectorExtractIntZx(y, (byte)0, 2),
  841. (uint)VectorExtractIntZx(y, (byte)1, 2),
  842. (uint)VectorExtractIntZx(y, (byte)2, 2));
  843. uint maj = ShaMajority((uint)VectorExtractIntZx(x, (byte)0, 2),
  844. (uint)VectorExtractIntZx(x, (byte)1, 2),
  845. (uint)VectorExtractIntZx(x, (byte)2, 2));
  846. uint t1 = (uint)VectorExtractIntZx(y, (byte)3, 2);
  847. t1 += ShaHashSigma1((uint)VectorExtractIntZx(y, (byte)0, 2)) + chs;
  848. t1 += (uint)VectorExtractIntZx(w, (byte)e, 2);
  849. uint t2 = t1 + (uint)VectorExtractIntZx(x, (byte)3, 2);
  850. x = VectorInsertInt((ulong)t2, x, (byte)3, 2);
  851. t2 = t1 + ShaHashSigma0((uint)VectorExtractIntZx(x, (byte)0, 2)) + maj;
  852. y = VectorInsertInt((ulong)t2, y, (byte)3, 2);
  853. Rol32_256(ref y, ref x);
  854. }
  855. return part1 ? x : y;
  856. }
  857. private static void Rol32_256(ref Vector128<float> y, ref Vector128<float> x)
  858. {
  859. if (!Sse2.IsSupported)
  860. {
  861. throw new PlatformNotSupportedException();
  862. }
  863. uint yE3 = (uint)VectorExtractIntZx(y, (byte)3, 2);
  864. uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
  865. y = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(y), (byte)4));
  866. x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
  867. y = VectorInsertInt((ulong)xE3, y, (byte)0, 2);
  868. x = VectorInsertInt((ulong)yE3, x, (byte)0, 2);
  869. }
  870. private static uint ShaHashSigma0(uint x)
  871. {
  872. return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
  873. }
  874. private static uint ShaHashSigma1(uint x)
  875. {
  876. return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
  877. }
  878. private static uint Ror(this uint value, int count)
  879. {
  880. return (value >> count) | (value << (32 - count));
  881. }
  882. private static uint Lsr(this uint value, int count)
  883. {
  884. return value >> count;
  885. }
  886. private static uint ULongPart(this ulong value, int part)
  887. {
  888. return part == 0
  889. ? (uint)(value & 0xFFFFFFFFUL)
  890. : (uint)(value >> 32);
  891. }
  892. #endregion
  893. #region "Reverse"
  894. public static uint ReverseBits8(uint value)
  895. {
  896. value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
  897. value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
  898. return (value >> 4) | ((value & 0x0f) << 4);
  899. }
  900. public static uint ReverseBits32(uint value)
  901. {
  902. value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
  903. value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
  904. value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
  905. value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
  906. return (value >> 16) | (value << 16);
  907. }
  908. public static ulong ReverseBits64(ulong value)
  909. {
  910. value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
  911. value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
  912. value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
  913. value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
  914. value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
  915. return (value >> 32) | (value << 32);
  916. }
  917. public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
  918. public static uint ReverseBytes32_32(uint value) => (uint)ReverseBytes32_64(value);
  919. public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
  920. public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
  921. public static ulong ReverseBytes64(ulong value) => ReverseBytes(value, RevSize.Rev64);
  922. private enum RevSize
  923. {
  924. Rev16,
  925. Rev32,
  926. Rev64
  927. }
  928. private static ulong ReverseBytes(ulong value, RevSize size)
  929. {
  930. value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
  931. if (size == RevSize.Rev16)
  932. {
  933. return value;
  934. }
  935. value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
  936. if (size == RevSize.Rev32)
  937. {
  938. return value;
  939. }
  940. value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
  941. if (size == RevSize.Rev64)
  942. {
  943. return value;
  944. }
  945. throw new ArgumentException(nameof(size));
  946. }
  947. #endregion
  948. #region "MultiplyHigh"
  949. public static long SMulHi128(long left, long right)
  950. {
  951. long result = (long)UMulHi128((ulong)left, (ulong)right);
  952. if (left < 0)
  953. {
  954. result -= right;
  955. }
  956. if (right < 0)
  957. {
  958. result -= left;
  959. }
  960. return result;
  961. }
  962. public static ulong UMulHi128(ulong left, ulong right)
  963. {
  964. ulong lHigh = left >> 32;
  965. ulong lLow = left & 0xFFFFFFFF;
  966. ulong rHigh = right >> 32;
  967. ulong rLow = right & 0xFFFFFFFF;
  968. ulong z2 = lLow * rLow;
  969. ulong t = lHigh * rLow + (z2 >> 32);
  970. ulong z1 = t & 0xFFFFFFFF;
  971. ulong z0 = t >> 32;
  972. z1 += lLow * rHigh;
  973. return lHigh * rHigh + z0 + (z1 >> 32);
  974. }
  975. #endregion
  976. }
  977. }