SoftFallback.cs 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199
  1. using ChocolArm64.State;
  2. using ChocolArm64.Translation;
  3. using System;
  4. using System.Runtime.CompilerServices;
  5. using System.Runtime.Intrinsics;
  6. using System.Runtime.Intrinsics.X86;
  7. namespace ChocolArm64.Instructions
  8. {
  9. using static VectorHelper;
  10. static class SoftFallback
  11. {
  12. public static void EmitCall(ILEmitterCtx context, string mthdName)
  13. {
  14. context.EmitCall(typeof(SoftFallback), mthdName);
  15. }
  16. #region "ShlReg"
  17. public static long SignedShlReg(long value, long shift, bool round, int size)
  18. {
  19. int eSize = 8 << size;
  20. int shiftLsB = (sbyte)shift;
  21. if (shiftLsB < 0)
  22. {
  23. return SignedShrReg(value, -shiftLsB, round, eSize);
  24. }
  25. else if (shiftLsB > 0)
  26. {
  27. if (shiftLsB >= eSize)
  28. {
  29. return 0L;
  30. }
  31. return value << shiftLsB;
  32. }
  33. else /* if (shiftLsB == 0) */
  34. {
  35. return value;
  36. }
  37. }
  38. public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
  39. {
  40. int eSize = 8 << size;
  41. int shiftLsB = (sbyte)shift;
  42. if (shiftLsB < 0)
  43. {
  44. return UnsignedShrReg(value, -shiftLsB, round, eSize);
  45. }
  46. else if (shiftLsB > 0)
  47. {
  48. if (shiftLsB >= eSize)
  49. {
  50. return 0UL;
  51. }
  52. return value << shiftLsB;
  53. }
  54. else /* if (shiftLsB == 0) */
  55. {
  56. return value;
  57. }
  58. }
  59. public static long SignedShlRegSatQ(long value, long shift, bool round, int size, CpuThreadState state)
  60. {
  61. int eSize = 8 << size;
  62. int shiftLsB = (sbyte)shift;
  63. if (shiftLsB < 0)
  64. {
  65. return SignedShrReg(value, -shiftLsB, round, eSize);
  66. }
  67. else if (shiftLsB > 0)
  68. {
  69. if (shiftLsB >= eSize)
  70. {
  71. return SignedSignSatQ(value, eSize, state);
  72. }
  73. if (eSize == 64)
  74. {
  75. long shl = value << shiftLsB;
  76. long shr = shl >> shiftLsB;
  77. if (shr != value)
  78. {
  79. return SignedSignSatQ(value, eSize, state);
  80. }
  81. else /* if (shr == value) */
  82. {
  83. return shl;
  84. }
  85. }
  86. else /* if (eSize != 64) */
  87. {
  88. return SignedSrcSignedDstSatQ(value << shiftLsB, size, state);
  89. }
  90. }
  91. else /* if (shiftLsB == 0) */
  92. {
  93. return value;
  94. }
  95. }
  96. public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size, CpuThreadState state)
  97. {
  98. int eSize = 8 << size;
  99. int shiftLsB = (sbyte)shift;
  100. if (shiftLsB < 0)
  101. {
  102. return UnsignedShrReg(value, -shiftLsB, round, eSize);
  103. }
  104. else if (shiftLsB > 0)
  105. {
  106. if (shiftLsB >= eSize)
  107. {
  108. return UnsignedSignSatQ(value, eSize, state);
  109. }
  110. if (eSize == 64)
  111. {
  112. ulong shl = value << shiftLsB;
  113. ulong shr = shl >> shiftLsB;
  114. if (shr != value)
  115. {
  116. return UnsignedSignSatQ(value, eSize, state);
  117. }
  118. else /* if (shr == value) */
  119. {
  120. return shl;
  121. }
  122. }
  123. else /* if (eSize != 64) */
  124. {
  125. return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size, state);
  126. }
  127. }
  128. else /* if (shiftLsB == 0) */
  129. {
  130. return value;
  131. }
  132. }
  133. private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  134. {
  135. if (round)
  136. {
  137. if (shift >= eSize)
  138. {
  139. return 0L;
  140. }
  141. long roundConst = 1L << (shift - 1);
  142. long add = value + roundConst;
  143. if (eSize == 64)
  144. {
  145. if ((~value & (value ^ add)) < 0L)
  146. {
  147. return (long)((ulong)add >> shift);
  148. }
  149. else
  150. {
  151. return add >> shift;
  152. }
  153. }
  154. else /* if (eSize != 64) */
  155. {
  156. return add >> shift;
  157. }
  158. }
  159. else /* if (!round) */
  160. {
  161. if (shift >= eSize)
  162. {
  163. if (value < 0L)
  164. {
  165. return -1L;
  166. }
  167. else /* if (value >= 0L) */
  168. {
  169. return 0L;
  170. }
  171. }
  172. return value >> shift;
  173. }
  174. }
  175. private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  176. {
  177. if (round)
  178. {
  179. if (shift > 64)
  180. {
  181. return 0UL;
  182. }
  183. ulong roundConst = 1UL << (shift - 1);
  184. ulong add = value + roundConst;
  185. if (eSize == 64)
  186. {
  187. if ((add < value) && (add < roundConst))
  188. {
  189. if (shift == 64)
  190. {
  191. return 1UL;
  192. }
  193. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  194. }
  195. else
  196. {
  197. if (shift == 64)
  198. {
  199. return 0UL;
  200. }
  201. return add >> shift;
  202. }
  203. }
  204. else /* if (eSize != 64) */
  205. {
  206. if (shift == 64)
  207. {
  208. return 0UL;
  209. }
  210. return add >> shift;
  211. }
  212. }
  213. else /* if (!round) */
  214. {
  215. if (shift >= eSize)
  216. {
  217. return 0UL;
  218. }
  219. return value >> shift;
  220. }
  221. }
  222. private static long SignedSignSatQ(long op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
  223. {
  224. long tMaxValue = (1L << (eSize - 1)) - 1L;
  225. long tMinValue = -(1L << (eSize - 1));
  226. if (op > 0L)
  227. {
  228. state.SetFpsrFlag(Fpsr.Qc);
  229. return tMaxValue;
  230. }
  231. else if (op < 0L)
  232. {
  233. state.SetFpsrFlag(Fpsr.Qc);
  234. return tMinValue;
  235. }
  236. else
  237. {
  238. return 0L;
  239. }
  240. }
  241. private static ulong UnsignedSignSatQ(ulong op, int eSize, CpuThreadState state) // eSize := {8, 16, 32, 64}.
  242. {
  243. ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
  244. if (op > 0UL)
  245. {
  246. state.SetFpsrFlag(Fpsr.Qc);
  247. return tMaxValue;
  248. }
  249. else
  250. {
  251. return 0UL;
  252. }
  253. }
  254. #endregion
  255. #region "ShrImm64"
  256. public static long SignedShrImm64(long value, long roundConst, int shift)
  257. {
  258. if (roundConst == 0L)
  259. {
  260. if (shift <= 63)
  261. {
  262. return value >> shift;
  263. }
  264. else /* if (shift == 64) */
  265. {
  266. if (value < 0L)
  267. {
  268. return -1L;
  269. }
  270. else /* if (value >= 0L) */
  271. {
  272. return 0L;
  273. }
  274. }
  275. }
  276. else /* if (roundConst == 1L << (shift - 1)) */
  277. {
  278. if (shift <= 63)
  279. {
  280. long add = value + roundConst;
  281. if ((~value & (value ^ add)) < 0L)
  282. {
  283. return (long)((ulong)add >> shift);
  284. }
  285. else
  286. {
  287. return add >> shift;
  288. }
  289. }
  290. else /* if (shift == 64) */
  291. {
  292. return 0L;
  293. }
  294. }
  295. }
  296. public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
  297. {
  298. if (roundConst == 0L)
  299. {
  300. if (shift <= 63)
  301. {
  302. return value >> shift;
  303. }
  304. else /* if (shift == 64) */
  305. {
  306. return 0UL;
  307. }
  308. }
  309. else /* if (roundConst == 1L << (shift - 1)) */
  310. {
  311. ulong add = value + (ulong)roundConst;
  312. if ((add < value) && (add < (ulong)roundConst))
  313. {
  314. if (shift <= 63)
  315. {
  316. return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
  317. }
  318. else /* if (shift == 64) */
  319. {
  320. return 1UL;
  321. }
  322. }
  323. else
  324. {
  325. if (shift <= 63)
  326. {
  327. return add >> shift;
  328. }
  329. else /* if (shift == 64) */
  330. {
  331. return 0UL;
  332. }
  333. }
  334. }
  335. }
  336. #endregion
  337. #region "Saturating"
  338. public static long SignedSrcSignedDstSatQ(long op, int size, CpuThreadState state)
  339. {
  340. int eSize = 8 << size;
  341. long tMaxValue = (1L << (eSize - 1)) - 1L;
  342. long tMinValue = -(1L << (eSize - 1));
  343. if (op > tMaxValue)
  344. {
  345. state.SetFpsrFlag(Fpsr.Qc);
  346. return tMaxValue;
  347. }
  348. else if (op < tMinValue)
  349. {
  350. state.SetFpsrFlag(Fpsr.Qc);
  351. return tMinValue;
  352. }
  353. else
  354. {
  355. return op;
  356. }
  357. }
  358. public static ulong SignedSrcUnsignedDstSatQ(long op, int size, CpuThreadState state)
  359. {
  360. int eSize = 8 << size;
  361. ulong tMaxValue = (1UL << eSize) - 1UL;
  362. ulong tMinValue = 0UL;
  363. if (op > (long)tMaxValue)
  364. {
  365. state.SetFpsrFlag(Fpsr.Qc);
  366. return tMaxValue;
  367. }
  368. else if (op < (long)tMinValue)
  369. {
  370. state.SetFpsrFlag(Fpsr.Qc);
  371. return tMinValue;
  372. }
  373. else
  374. {
  375. return (ulong)op;
  376. }
  377. }
  378. public static long UnsignedSrcSignedDstSatQ(ulong op, int size, CpuThreadState state)
  379. {
  380. int eSize = 8 << size;
  381. long tMaxValue = (1L << (eSize - 1)) - 1L;
  382. if (op > (ulong)tMaxValue)
  383. {
  384. state.SetFpsrFlag(Fpsr.Qc);
  385. return tMaxValue;
  386. }
  387. else
  388. {
  389. return (long)op;
  390. }
  391. }
  392. public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size, CpuThreadState state)
  393. {
  394. int eSize = 8 << size;
  395. ulong tMaxValue = (1UL << eSize) - 1UL;
  396. if (op > tMaxValue)
  397. {
  398. state.SetFpsrFlag(Fpsr.Qc);
  399. return tMaxValue;
  400. }
  401. else
  402. {
  403. return op;
  404. }
  405. }
  406. public static long UnarySignedSatQAbsOrNeg(long op, CpuThreadState state)
  407. {
  408. if (op == long.MinValue)
  409. {
  410. state.SetFpsrFlag(Fpsr.Qc);
  411. return long.MaxValue;
  412. }
  413. else
  414. {
  415. return op;
  416. }
  417. }
  418. public static long BinarySignedSatQAdd(long op1, long op2, CpuThreadState state)
  419. {
  420. long add = op1 + op2;
  421. if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
  422. {
  423. state.SetFpsrFlag(Fpsr.Qc);
  424. if (op1 < 0L)
  425. {
  426. return long.MinValue;
  427. }
  428. else
  429. {
  430. return long.MaxValue;
  431. }
  432. }
  433. else
  434. {
  435. return add;
  436. }
  437. }
  438. public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, CpuThreadState state)
  439. {
  440. ulong add = op1 + op2;
  441. if ((add < op1) && (add < op2))
  442. {
  443. state.SetFpsrFlag(Fpsr.Qc);
  444. return ulong.MaxValue;
  445. }
  446. else
  447. {
  448. return add;
  449. }
  450. }
  451. public static long BinarySignedSatQSub(long op1, long op2, CpuThreadState state)
  452. {
  453. long sub = op1 - op2;
  454. if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
  455. {
  456. state.SetFpsrFlag(Fpsr.Qc);
  457. if (op1 < 0L)
  458. {
  459. return long.MinValue;
  460. }
  461. else
  462. {
  463. return long.MaxValue;
  464. }
  465. }
  466. else
  467. {
  468. return sub;
  469. }
  470. }
  471. public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, CpuThreadState state)
  472. {
  473. ulong sub = op1 - op2;
  474. if (op1 < op2)
  475. {
  476. state.SetFpsrFlag(Fpsr.Qc);
  477. return ulong.MinValue;
  478. }
  479. else
  480. {
  481. return sub;
  482. }
  483. }
  484. public static long BinarySignedSatQAcc(ulong op1, long op2, CpuThreadState state)
  485. {
  486. if (op1 <= (ulong)long.MaxValue)
  487. {
  488. // op1 from ulong.MinValue to (ulong)long.MaxValue
  489. // op2 from long.MinValue to long.MaxValue
  490. long add = (long)op1 + op2;
  491. if ((~op2 & add) < 0L)
  492. {
  493. state.SetFpsrFlag(Fpsr.Qc);
  494. return long.MaxValue;
  495. }
  496. else
  497. {
  498. return add;
  499. }
  500. }
  501. else if (op2 >= 0L)
  502. {
  503. // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  504. // op2 from (long)ulong.MinValue to long.MaxValue
  505. state.SetFpsrFlag(Fpsr.Qc);
  506. return long.MaxValue;
  507. }
  508. else
  509. {
  510. // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  511. // op2 from long.MinValue to (long)ulong.MinValue - 1L
  512. ulong add = op1 + (ulong)op2;
  513. if (add > (ulong)long.MaxValue)
  514. {
  515. state.SetFpsrFlag(Fpsr.Qc);
  516. return long.MaxValue;
  517. }
  518. else
  519. {
  520. return (long)add;
  521. }
  522. }
  523. }
  524. public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, CpuThreadState state)
  525. {
  526. if (op1 >= 0L)
  527. {
  528. // op1 from (long)ulong.MinValue to long.MaxValue
  529. // op2 from ulong.MinValue to ulong.MaxValue
  530. ulong add = (ulong)op1 + op2;
  531. if ((add < (ulong)op1) && (add < op2))
  532. {
  533. state.SetFpsrFlag(Fpsr.Qc);
  534. return ulong.MaxValue;
  535. }
  536. else
  537. {
  538. return add;
  539. }
  540. }
  541. else if (op2 > (ulong)long.MaxValue)
  542. {
  543. // op1 from long.MinValue to (long)ulong.MinValue - 1L
  544. // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
  545. return (ulong)op1 + op2;
  546. }
  547. else
  548. {
  549. // op1 from long.MinValue to (long)ulong.MinValue - 1L
  550. // op2 from ulong.MinValue to (ulong)long.MaxValue
  551. long add = op1 + (long)op2;
  552. if (add < (long)ulong.MinValue)
  553. {
  554. state.SetFpsrFlag(Fpsr.Qc);
  555. return ulong.MinValue;
  556. }
  557. else
  558. {
  559. return (ulong)add;
  560. }
  561. }
  562. }
  563. #endregion
  564. #region "Count"
  565. public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  566. {
  567. value ^= value >> 1;
  568. int highBit = size - 2;
  569. for (int bit = highBit; bit >= 0; bit--)
  570. {
  571. if (((value >> bit) & 0b1) != 0)
  572. {
  573. return (ulong)(highBit - bit);
  574. }
  575. }
  576. return (ulong)(size - 1);
  577. }
  578. private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
  579. public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
  580. {
  581. if (value == 0ul)
  582. {
  583. return (ulong)size;
  584. }
  585. int nibbleIdx = size;
  586. int preCount, count = 0;
  587. do
  588. {
  589. nibbleIdx -= 4;
  590. preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111];
  591. count += preCount;
  592. }
  593. while (preCount == 4);
  594. return (ulong)count;
  595. }
  596. public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
  597. {
  598. if (value == 0xfful)
  599. {
  600. return 8ul;
  601. }
  602. value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
  603. value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
  604. return (value >> 4) + (value & 0x0ful);
  605. }
  606. #endregion
  607. #region "Crc32"
  608. private const uint Crc32RevPoly = 0xedb88320;
  609. private const uint Crc32CRevPoly = 0x82f63b78;
  610. public static uint Crc32B(uint crc, byte val) => Crc32 (crc, Crc32RevPoly, val);
  611. public static uint Crc32H(uint crc, ushort val) => Crc32H(crc, Crc32RevPoly, val);
  612. public static uint Crc32W(uint crc, uint val) => Crc32W(crc, Crc32RevPoly, val);
  613. public static uint Crc32X(uint crc, ulong val) => Crc32X(crc, Crc32RevPoly, val);
  614. public static uint Crc32Cb(uint crc, byte val) => Crc32 (crc, Crc32CRevPoly, val);
  615. public static uint Crc32Ch(uint crc, ushort val) => Crc32H(crc, Crc32CRevPoly, val);
  616. public static uint Crc32Cw(uint crc, uint val) => Crc32W(crc, Crc32CRevPoly, val);
  617. public static uint Crc32Cx(uint crc, ulong val) => Crc32X(crc, Crc32CRevPoly, val);
  618. private static uint Crc32H(uint crc, uint poly, ushort val)
  619. {
  620. crc = Crc32(crc, poly, (byte)(val >> 0));
  621. crc = Crc32(crc, poly, (byte)(val >> 8));
  622. return crc;
  623. }
  624. private static uint Crc32W(uint crc, uint poly, uint val)
  625. {
  626. crc = Crc32(crc, poly, (byte)(val >> 0 ));
  627. crc = Crc32(crc, poly, (byte)(val >> 8 ));
  628. crc = Crc32(crc, poly, (byte)(val >> 16));
  629. crc = Crc32(crc, poly, (byte)(val >> 24));
  630. return crc;
  631. }
  632. private static uint Crc32X(uint crc, uint poly, ulong val)
  633. {
  634. crc = Crc32(crc, poly, (byte)(val >> 0 ));
  635. crc = Crc32(crc, poly, (byte)(val >> 8 ));
  636. crc = Crc32(crc, poly, (byte)(val >> 16));
  637. crc = Crc32(crc, poly, (byte)(val >> 24));
  638. crc = Crc32(crc, poly, (byte)(val >> 32));
  639. crc = Crc32(crc, poly, (byte)(val >> 40));
  640. crc = Crc32(crc, poly, (byte)(val >> 48));
  641. crc = Crc32(crc, poly, (byte)(val >> 56));
  642. return crc;
  643. }
  644. private static uint Crc32(uint crc, uint poly, byte val)
  645. {
  646. crc ^= val;
  647. for (int bit = 7; bit >= 0; bit--)
  648. {
  649. uint mask = (uint)(-(int)(crc & 1));
  650. crc = (crc >> 1) ^ (poly & mask);
  651. }
  652. return crc;
  653. }
  654. #endregion
  655. #region "Aes"
  656. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  657. public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey)
  658. {
  659. if (!Sse.IsSupported)
  660. {
  661. throw new PlatformNotSupportedException();
  662. }
  663. return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(Sse.Xor(value, roundKey)));
  664. }
  665. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  666. public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey)
  667. {
  668. if (!Sse.IsSupported)
  669. {
  670. throw new PlatformNotSupportedException();
  671. }
  672. return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(Sse.Xor(value, roundKey)));
  673. }
  674. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  675. public static Vector128<float> InverseMixColumns(Vector128<float> value)
  676. {
  677. return CryptoHelper.AesInvMixColumns(value);
  678. }
  679. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  680. public static Vector128<float> MixColumns(Vector128<float> value)
  681. {
  682. return CryptoHelper.AesMixColumns(value);
  683. }
  684. #endregion
  685. #region "Sha1"
  686. public static Vector128<float> HashChoose(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  687. {
  688. for (int e = 0; e <= 3; e++)
  689. {
  690. uint t = ShaChoose((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  691. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  692. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  693. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  694. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  695. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  696. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  697. Rol32_160(ref hash_e, ref hash_abcd);
  698. }
  699. return hash_abcd;
  700. }
  701. public static uint FixedRotate(uint hash_e)
  702. {
  703. return hash_e.Rol(30);
  704. }
  705. public static Vector128<float> HashMajority(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  706. {
  707. for (int e = 0; e <= 3; e++)
  708. {
  709. uint t = ShaMajority((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  710. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  711. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  712. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  713. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  714. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  715. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  716. Rol32_160(ref hash_e, ref hash_abcd);
  717. }
  718. return hash_abcd;
  719. }
  720. public static Vector128<float> HashParity(Vector128<float> hash_abcd, uint hash_e, Vector128<float> wk)
  721. {
  722. for (int e = 0; e <= 3; e++)
  723. {
  724. uint t = ShaParity((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2),
  725. (uint)VectorExtractIntZx(hash_abcd, (byte)2, 2),
  726. (uint)VectorExtractIntZx(hash_abcd, (byte)3, 2));
  727. hash_e += Rol((uint)VectorExtractIntZx(hash_abcd, (byte)0, 2), 5) + t;
  728. hash_e += (uint)VectorExtractIntZx(wk, (byte)e, 2);
  729. t = Rol((uint)VectorExtractIntZx(hash_abcd, (byte)1, 2), 30);
  730. hash_abcd = VectorInsertInt((ulong)t, hash_abcd, (byte)1, 2);
  731. Rol32_160(ref hash_e, ref hash_abcd);
  732. }
  733. return hash_abcd;
  734. }
  735. public static Vector128<float> Sha1SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7, Vector128<float> w8_11)
  736. {
  737. if (!Sse.IsSupported)
  738. {
  739. throw new PlatformNotSupportedException();
  740. }
  741. Vector128<float> result = new Vector128<float>();
  742. ulong t2 = VectorExtractIntZx(w4_7, (byte)0, 3);
  743. ulong t1 = VectorExtractIntZx(w0_3, (byte)1, 3);
  744. result = VectorInsertInt((ulong)t1, result, (byte)0, 3);
  745. result = VectorInsertInt((ulong)t2, result, (byte)1, 3);
  746. return Sse.Xor(result, Sse.Xor(w0_3, w8_11));
  747. }
  748. public static Vector128<float> Sha1SchedulePart2(Vector128<float> tw0_3, Vector128<float> w12_15)
  749. {
  750. if (!Sse2.IsSupported)
  751. {
  752. throw new PlatformNotSupportedException();
  753. }
  754. Vector128<float> result = new Vector128<float>();
  755. Vector128<float> t = Sse.Xor(tw0_3, Sse.StaticCast<uint, float>(
  756. Sse2.ShiftRightLogical128BitLane(Sse.StaticCast<float, uint>(w12_15), (byte)4)));
  757. uint tE0 = (uint)VectorExtractIntZx(t, (byte)0, 2);
  758. uint tE1 = (uint)VectorExtractIntZx(t, (byte)1, 2);
  759. uint tE2 = (uint)VectorExtractIntZx(t, (byte)2, 2);
  760. uint tE3 = (uint)VectorExtractIntZx(t, (byte)3, 2);
  761. result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2);
  762. result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2);
  763. result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2);
  764. return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2);
  765. }
  766. private static void Rol32_160(ref uint y, ref Vector128<float> x)
  767. {
  768. if (!Sse2.IsSupported)
  769. {
  770. throw new PlatformNotSupportedException();
  771. }
  772. uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
  773. x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
  774. x = VectorInsertInt((ulong)y, x, (byte)0, 2);
  775. y = xE3;
  776. }
  777. private static uint ShaChoose(uint x, uint y, uint z)
  778. {
  779. return ((y ^ z) & x) ^ z;
  780. }
  781. private static uint ShaMajority(uint x, uint y, uint z)
  782. {
  783. return (x & y) | ((x | y) & z);
  784. }
  785. private static uint ShaParity(uint x, uint y, uint z)
  786. {
  787. return x ^ y ^ z;
  788. }
  789. private static uint Rol(this uint value, int count)
  790. {
  791. return (value << count) | (value >> (32 - count));
  792. }
  793. #endregion
  794. #region "Sha256"
  795. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  796. public static Vector128<float> HashLower(Vector128<float> hash_abcd, Vector128<float> hash_efgh, Vector128<float> wk)
  797. {
  798. return Sha256Hash(hash_abcd, hash_efgh, wk, true);
  799. }
  800. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  801. public static Vector128<float> HashUpper(Vector128<float> hash_efgh, Vector128<float> hash_abcd, Vector128<float> wk)
  802. {
  803. return Sha256Hash(hash_abcd, hash_efgh, wk, false);
  804. }
  805. public static Vector128<float> Sha256SchedulePart1(Vector128<float> w0_3, Vector128<float> w4_7)
  806. {
  807. Vector128<float> result = new Vector128<float>();
  808. for (int e = 0; e <= 3; e++)
  809. {
  810. uint elt = (uint)VectorExtractIntZx(e <= 2 ? w0_3 : w4_7, (byte)(e <= 2 ? e + 1 : 0), 2);
  811. elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
  812. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  813. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  814. }
  815. return result;
  816. }
  817. public static Vector128<float> Sha256SchedulePart2(Vector128<float> w0_3, Vector128<float> w8_11, Vector128<float> w12_15)
  818. {
  819. Vector128<float> result = new Vector128<float>();
  820. ulong t1 = VectorExtractIntZx(w12_15, (byte)1, 3);
  821. for (int e = 0; e <= 1; e++)
  822. {
  823. uint elt = t1.ULongPart(e);
  824. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  825. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  826. elt += (uint)VectorExtractIntZx(w8_11, (byte)(e + 1), 2);
  827. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  828. }
  829. t1 = VectorExtractIntZx(result, (byte)0, 3);
  830. for (int e = 2; e <= 3; e++)
  831. {
  832. uint elt = t1.ULongPart(e - 2);
  833. elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
  834. elt += (uint)VectorExtractIntZx(w0_3, (byte)e, 2);
  835. elt += (uint)VectorExtractIntZx(e == 2 ? w8_11 : w12_15, (byte)(e == 2 ? 3 : 0), 2);
  836. result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
  837. }
  838. return result;
  839. }
  840. private static Vector128<float> Sha256Hash(Vector128<float> x, Vector128<float> y, Vector128<float> w, bool part1)
  841. {
  842. for (int e = 0; e <= 3; e++)
  843. {
  844. uint chs = ShaChoose((uint)VectorExtractIntZx(y, (byte)0, 2),
  845. (uint)VectorExtractIntZx(y, (byte)1, 2),
  846. (uint)VectorExtractIntZx(y, (byte)2, 2));
  847. uint maj = ShaMajority((uint)VectorExtractIntZx(x, (byte)0, 2),
  848. (uint)VectorExtractIntZx(x, (byte)1, 2),
  849. (uint)VectorExtractIntZx(x, (byte)2, 2));
  850. uint t1 = (uint)VectorExtractIntZx(y, (byte)3, 2);
  851. t1 += ShaHashSigma1((uint)VectorExtractIntZx(y, (byte)0, 2)) + chs;
  852. t1 += (uint)VectorExtractIntZx(w, (byte)e, 2);
  853. uint t2 = t1 + (uint)VectorExtractIntZx(x, (byte)3, 2);
  854. x = VectorInsertInt((ulong)t2, x, (byte)3, 2);
  855. t2 = t1 + ShaHashSigma0((uint)VectorExtractIntZx(x, (byte)0, 2)) + maj;
  856. y = VectorInsertInt((ulong)t2, y, (byte)3, 2);
  857. Rol32_256(ref y, ref x);
  858. }
  859. return part1 ? x : y;
  860. }
  861. private static void Rol32_256(ref Vector128<float> y, ref Vector128<float> x)
  862. {
  863. if (!Sse2.IsSupported)
  864. {
  865. throw new PlatformNotSupportedException();
  866. }
  867. uint yE3 = (uint)VectorExtractIntZx(y, (byte)3, 2);
  868. uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
  869. y = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(y), (byte)4));
  870. x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
  871. y = VectorInsertInt((ulong)xE3, y, (byte)0, 2);
  872. x = VectorInsertInt((ulong)yE3, x, (byte)0, 2);
  873. }
  874. private static uint ShaHashSigma0(uint x)
  875. {
  876. return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
  877. }
  878. private static uint ShaHashSigma1(uint x)
  879. {
  880. return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
  881. }
  882. private static uint Ror(this uint value, int count)
  883. {
  884. return (value >> count) | (value << (32 - count));
  885. }
  886. private static uint Lsr(this uint value, int count)
  887. {
  888. return value >> count;
  889. }
  890. private static uint ULongPart(this ulong value, int part)
  891. {
  892. return part == 0
  893. ? (uint)(value & 0xFFFFFFFFUL)
  894. : (uint)(value >> 32);
  895. }
  896. #endregion
  897. #region "Reverse"
  898. public static uint ReverseBits8(uint value)
  899. {
  900. value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
  901. value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
  902. return (value >> 4) | ((value & 0x0f) << 4);
  903. }
  904. public static uint ReverseBits32(uint value)
  905. {
  906. value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
  907. value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
  908. value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
  909. value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
  910. return (value >> 16) | (value << 16);
  911. }
  912. public static ulong ReverseBits64(ulong value)
  913. {
  914. value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
  915. value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
  916. value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
  917. value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
  918. value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
  919. return (value >> 32) | (value << 32);
  920. }
  921. public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
  922. public static uint ReverseBytes32_32(uint value) => (uint)ReverseBytes32_64(value);
  923. public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
  924. public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
  925. public static ulong ReverseBytes64(ulong value) => ReverseBytes(value, RevSize.Rev64);
  926. private enum RevSize
  927. {
  928. Rev16,
  929. Rev32,
  930. Rev64
  931. }
  932. private static ulong ReverseBytes(ulong value, RevSize size)
  933. {
  934. value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
  935. if (size == RevSize.Rev16)
  936. {
  937. return value;
  938. }
  939. value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
  940. if (size == RevSize.Rev32)
  941. {
  942. return value;
  943. }
  944. value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
  945. if (size == RevSize.Rev64)
  946. {
  947. return value;
  948. }
  949. throw new ArgumentException(nameof(size));
  950. }
  951. #endregion
  952. #region "MultiplyHigh"
  953. public static long SMulHi128(long left, long right)
  954. {
  955. long result = (long)UMulHi128((ulong)left, (ulong)right);
  956. if (left < 0)
  957. {
  958. result -= right;
  959. }
  960. if (right < 0)
  961. {
  962. result -= left;
  963. }
  964. return result;
  965. }
  966. public static ulong UMulHi128(ulong left, ulong right)
  967. {
  968. ulong lHigh = left >> 32;
  969. ulong lLow = left & 0xFFFFFFFF;
  970. ulong rHigh = right >> 32;
  971. ulong rLow = right & 0xFFFFFFFF;
  972. ulong z2 = lLow * rLow;
  973. ulong t = lHigh * rLow + (z2 >> 32);
  974. ulong z1 = t & 0xFFFFFFFF;
  975. ulong z0 = t >> 32;
  976. z1 += lLow * rHigh;
  977. return lHigh * rHigh + z0 + (z1 >> 32);
  978. }
  979. #endregion
  980. }
  981. }