InstEmitSimdLogical.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.Translation;
  4. using System;
  5. using System.Diagnostics;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  9. namespace ARMeilleure.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void And_V(ArmEmitterContext context)
  14. {
  15. if (Optimizations.UseAdvSimd)
  16. {
  17. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
  18. }
  19. else if (Optimizations.UseSse2)
  20. {
  21. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  22. Operand n = GetVec(op.Rn);
  23. Operand m = GetVec(op.Rm);
  24. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
  25. if (op.RegisterSize == RegisterSize.Simd64)
  26. {
  27. res = context.VectorZeroUpper64(res);
  28. }
  29. context.Copy(GetVec(op.Rd), res);
  30. }
  31. else
  32. {
  33. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
  34. }
  35. }
  36. public static void Bic_V(ArmEmitterContext context)
  37. {
  38. if (Optimizations.UseAdvSimd)
  39. {
  40. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
  41. }
  42. else if (Optimizations.UseSse2)
  43. {
  44. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  45. Operand n = GetVec(op.Rn);
  46. Operand m = GetVec(op.Rm);
  47. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
  48. if (op.RegisterSize == RegisterSize.Simd64)
  49. {
  50. res = context.VectorZeroUpper64(res);
  51. }
  52. context.Copy(GetVec(op.Rd), res);
  53. }
  54. else
  55. {
  56. EmitVectorBinaryOpZx(context, (op1, op2) =>
  57. {
  58. return context.BitwiseAnd(op1, context.BitwiseNot(op2));
  59. });
  60. }
  61. }
  62. public static void Bic_Vi(ArmEmitterContext context)
  63. {
  64. if (Optimizations.UseSse2)
  65. {
  66. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  67. int eSize = 8 << op.Size;
  68. Operand d = GetVec(op.Rd);
  69. Operand imm = eSize switch {
  70. 16 => X86GetAllElements(context, (short)~op.Immediate),
  71. 32 => X86GetAllElements(context, (int)~op.Immediate),
  72. _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
  73. };
  74. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm);
  75. if (op.RegisterSize == RegisterSize.Simd64)
  76. {
  77. res = context.VectorZeroUpper64(res);
  78. }
  79. context.Copy(GetVec(op.Rd), res);
  80. }
  81. else
  82. {
  83. EmitVectorImmBinaryOp(context, (op1, op2) =>
  84. {
  85. return context.BitwiseAnd(op1, context.BitwiseNot(op2));
  86. });
  87. }
  88. }
  89. public static void Bif_V(ArmEmitterContext context)
  90. {
  91. if (Optimizations.UseAdvSimd)
  92. {
  93. InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
  94. }
  95. else
  96. {
  97. EmitBifBit(context, notRm: true);
  98. }
  99. }
  100. public static void Bit_V(ArmEmitterContext context)
  101. {
  102. if (Optimizations.UseAdvSimd)
  103. {
  104. InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
  105. }
  106. else
  107. {
  108. EmitBifBit(context, notRm: false);
  109. }
  110. }
  111. private static void EmitBifBit(ArmEmitterContext context, bool notRm)
  112. {
  113. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  114. if (Optimizations.UseSse2)
  115. {
  116. Operand d = GetVec(op.Rd);
  117. Operand n = GetVec(op.Rn);
  118. Operand m = GetVec(op.Rm);
  119. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
  120. if (notRm)
  121. {
  122. res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
  123. }
  124. else
  125. {
  126. res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
  127. }
  128. res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
  129. if (op.RegisterSize == RegisterSize.Simd64)
  130. {
  131. res = context.VectorZeroUpper64(res);
  132. }
  133. context.Copy(d, res);
  134. }
  135. else
  136. {
  137. Operand res = context.VectorZero();
  138. int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
  139. for (int index = 0; index < elems; index++)
  140. {
  141. Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
  142. Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
  143. Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
  144. if (notRm)
  145. {
  146. m = context.BitwiseNot(m);
  147. }
  148. Operand e = context.BitwiseExclusiveOr(d, n);
  149. e = context.BitwiseAnd(e, m);
  150. e = context.BitwiseExclusiveOr(e, d);
  151. res = EmitVectorInsert(context, res, e, index, 3);
  152. }
  153. context.Copy(GetVec(op.Rd), res);
  154. }
  155. }
  156. public static void Bsl_V(ArmEmitterContext context)
  157. {
  158. if (Optimizations.UseAdvSimd)
  159. {
  160. InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
  161. }
  162. else if (Optimizations.UseSse2)
  163. {
  164. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  165. Operand d = GetVec(op.Rd);
  166. Operand n = GetVec(op.Rn);
  167. Operand m = GetVec(op.Rm);
  168. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  169. res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
  170. res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
  171. if (op.RegisterSize == RegisterSize.Simd64)
  172. {
  173. res = context.VectorZeroUpper64(res);
  174. }
  175. context.Copy(d, res);
  176. }
  177. else
  178. {
  179. EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
  180. {
  181. return context.BitwiseExclusiveOr(
  182. context.BitwiseAnd(op1,
  183. context.BitwiseExclusiveOr(op2, op3)), op3);
  184. });
  185. }
  186. }
  187. public static void Eor_V(ArmEmitterContext context)
  188. {
  189. if (Optimizations.UseAdvSimd)
  190. {
  191. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
  192. }
  193. else if (Optimizations.UseSse2)
  194. {
  195. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  196. Operand n = GetVec(op.Rn);
  197. Operand m = GetVec(op.Rm);
  198. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  199. if (op.RegisterSize == RegisterSize.Simd64)
  200. {
  201. res = context.VectorZeroUpper64(res);
  202. }
  203. context.Copy(GetVec(op.Rd), res);
  204. }
  205. else
  206. {
  207. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
  208. }
  209. }
  210. public static void Not_V(ArmEmitterContext context)
  211. {
  212. if (Optimizations.UseAvx512Ortho)
  213. {
  214. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  215. Operand n = GetVec(op.Rn);
  216. Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
  217. if (op.RegisterSize == RegisterSize.Simd64)
  218. {
  219. res = context.VectorZeroUpper64(res);
  220. }
  221. context.Copy(GetVec(op.Rd), res);
  222. }
  223. else if (Optimizations.UseSse2)
  224. {
  225. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  226. Operand n = GetVec(op.Rn);
  227. Operand mask = X86GetAllElements(context, -1L);
  228. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
  229. if (op.RegisterSize == RegisterSize.Simd64)
  230. {
  231. res = context.VectorZeroUpper64(res);
  232. }
  233. context.Copy(GetVec(op.Rd), res);
  234. }
  235. else
  236. {
  237. EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
  238. }
  239. }
  240. public static void Orn_V(ArmEmitterContext context)
  241. {
  242. if (Optimizations.UseAdvSimd)
  243. {
  244. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
  245. }
  246. else if (Optimizations.UseAvx512Ortho)
  247. {
  248. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  249. Operand n = GetVec(op.Rn);
  250. Operand m = GetVec(op.Rm);
  251. Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
  252. if (op.RegisterSize == RegisterSize.Simd64)
  253. {
  254. res = context.VectorZeroUpper64(res);
  255. }
  256. context.Copy(GetVec(op.Rd), res);
  257. }
  258. else if (Optimizations.UseSse2)
  259. {
  260. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  261. Operand n = GetVec(op.Rn);
  262. Operand m = GetVec(op.Rm);
  263. Operand mask = X86GetAllElements(context, -1L);
  264. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
  265. res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
  266. if (op.RegisterSize == RegisterSize.Simd64)
  267. {
  268. res = context.VectorZeroUpper64(res);
  269. }
  270. context.Copy(GetVec(op.Rd), res);
  271. }
  272. else
  273. {
  274. EmitVectorBinaryOpZx(context, (op1, op2) =>
  275. {
  276. return context.BitwiseOr(op1, context.BitwiseNot(op2));
  277. });
  278. }
  279. }
  280. public static void Orr_V(ArmEmitterContext context)
  281. {
  282. if (Optimizations.UseAdvSimd)
  283. {
  284. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
  285. }
  286. else if (Optimizations.UseSse2)
  287. {
  288. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  289. Operand n = GetVec(op.Rn);
  290. Operand m = GetVec(op.Rm);
  291. Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
  292. if (op.RegisterSize == RegisterSize.Simd64)
  293. {
  294. res = context.VectorZeroUpper64(res);
  295. }
  296. context.Copy(GetVec(op.Rd), res);
  297. }
  298. else
  299. {
  300. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
  301. }
  302. }
  303. public static void Orr_Vi(ArmEmitterContext context)
  304. {
  305. if (Optimizations.UseSse2)
  306. {
  307. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  308. int eSize = 8 << op.Size;
  309. Operand d = GetVec(op.Rd);
  310. Operand imm = eSize switch {
  311. 16 => X86GetAllElements(context, (short)op.Immediate),
  312. 32 => X86GetAllElements(context, (int)op.Immediate),
  313. _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
  314. };
  315. Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm);
  316. if (op.RegisterSize == RegisterSize.Simd64)
  317. {
  318. res = context.VectorZeroUpper64(res);
  319. }
  320. context.Copy(GetVec(op.Rd), res);
  321. }
  322. else
  323. {
  324. EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
  325. }
  326. }
  327. public static void Rbit_V(ArmEmitterContext context)
  328. {
  329. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  330. if (Optimizations.UseGfni)
  331. {
  332. const long bitMatrix =
  333. (0b10000000L << 56) |
  334. (0b01000000L << 48) |
  335. (0b00100000L << 40) |
  336. (0b00010000L << 32) |
  337. (0b00001000L << 24) |
  338. (0b00000100L << 16) |
  339. (0b00000010L << 8) |
  340. (0b00000001L << 0);
  341. Operand vBitMatrix = X86GetAllElements(context, bitMatrix);
  342. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
  343. if (op.RegisterSize == RegisterSize.Simd64)
  344. {
  345. res = context.VectorZeroUpper64(res);
  346. }
  347. context.Copy(GetVec(op.Rd), res);
  348. }
  349. else
  350. {
  351. Operand res = context.VectorZero();
  352. int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
  353. for (int index = 0; index < elems; index++)
  354. {
  355. Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
  356. Operand de = EmitReverseBits8Op(context, ne);
  357. res = EmitVectorInsert(context, res, de, index, 0);
  358. }
  359. context.Copy(GetVec(op.Rd), res);
  360. }
  361. }
  362. private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
  363. {
  364. Debug.Assert(op.Type == OperandType.I64);
  365. Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
  366. context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
  367. val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
  368. context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
  369. return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
  370. context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
  371. }
  372. public static void Rev16_V(ArmEmitterContext context)
  373. {
  374. if (Optimizations.UseSsse3)
  375. {
  376. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  377. Operand n = GetVec(op.Rn);
  378. const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
  379. const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
  380. Operand mask = X86GetScalar(context, maskE0);
  381. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  382. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  383. if (op.RegisterSize == RegisterSize.Simd64)
  384. {
  385. res = context.VectorZeroUpper64(res);
  386. }
  387. context.Copy(GetVec(op.Rd), res);
  388. }
  389. else
  390. {
  391. EmitRev_V(context, containerSize: 1);
  392. }
  393. }
  394. public static void Rev32_V(ArmEmitterContext context)
  395. {
  396. if (Optimizations.UseSsse3)
  397. {
  398. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  399. Operand n = GetVec(op.Rn);
  400. Operand mask;
  401. if (op.Size == 0)
  402. {
  403. const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
  404. const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
  405. mask = X86GetScalar(context, maskE0);
  406. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  407. }
  408. else /* if (op.Size == 1) */
  409. {
  410. const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
  411. const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
  412. mask = X86GetScalar(context, maskE0);
  413. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  414. }
  415. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  416. if (op.RegisterSize == RegisterSize.Simd64)
  417. {
  418. res = context.VectorZeroUpper64(res);
  419. }
  420. context.Copy(GetVec(op.Rd), res);
  421. }
  422. else
  423. {
  424. EmitRev_V(context, containerSize: 2);
  425. }
  426. }
  427. public static void Rev64_V(ArmEmitterContext context)
  428. {
  429. if (Optimizations.UseSsse3)
  430. {
  431. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  432. Operand n = GetVec(op.Rn);
  433. Operand mask;
  434. if (op.Size == 0)
  435. {
  436. const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
  437. const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
  438. mask = X86GetScalar(context, maskE0);
  439. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  440. }
  441. else if (op.Size == 1)
  442. {
  443. const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
  444. const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
  445. mask = X86GetScalar(context, maskE0);
  446. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  447. }
  448. else /* if (op.Size == 2) */
  449. {
  450. const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
  451. const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
  452. mask = X86GetScalar(context, maskE0);
  453. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  454. }
  455. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  456. if (op.RegisterSize == RegisterSize.Simd64)
  457. {
  458. res = context.VectorZeroUpper64(res);
  459. }
  460. context.Copy(GetVec(op.Rd), res);
  461. }
  462. else
  463. {
  464. EmitRev_V(context, containerSize: 3);
  465. }
  466. }
  467. private static void EmitRev_V(ArmEmitterContext context, int containerSize)
  468. {
  469. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  470. Operand res = context.VectorZero();
  471. int elems = op.GetBytesCount() >> op.Size;
  472. int containerMask = (1 << (containerSize - op.Size)) - 1;
  473. for (int index = 0; index < elems; index++)
  474. {
  475. int revIndex = index ^ containerMask;
  476. Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
  477. res = EmitVectorInsert(context, res, ne, index, op.Size);
  478. }
  479. context.Copy(GetVec(op.Rd), res);
  480. }
  481. }
  482. }