InstEmitSimdLogical.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.Translation;
  4. using System;
  5. using System.Diagnostics;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  9. namespace ARMeilleure.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void And_V(ArmEmitterContext context)
  14. {
  15. if (Optimizations.UseSse2)
  16. {
  17. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  18. Operand n = GetVec(op.Rn);
  19. Operand m = GetVec(op.Rm);
  20. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
  21. if (op.RegisterSize == RegisterSize.Simd64)
  22. {
  23. res = context.VectorZeroUpper64(res);
  24. }
  25. context.Copy(GetVec(op.Rd), res);
  26. }
  27. else
  28. {
  29. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
  30. }
  31. }
  32. public static void Bic_V(ArmEmitterContext context)
  33. {
  34. if (Optimizations.UseSse2)
  35. {
  36. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  37. Operand n = GetVec(op.Rn);
  38. Operand m = GetVec(op.Rm);
  39. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
  40. if (op.RegisterSize == RegisterSize.Simd64)
  41. {
  42. res = context.VectorZeroUpper64(res);
  43. }
  44. context.Copy(GetVec(op.Rd), res);
  45. }
  46. else
  47. {
  48. EmitVectorBinaryOpZx(context, (op1, op2) =>
  49. {
  50. return context.BitwiseAnd(op1, context.BitwiseNot(op2));
  51. });
  52. }
  53. }
  54. public static void Bic_Vi(ArmEmitterContext context)
  55. {
  56. if (Optimizations.UseSse2)
  57. {
  58. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  59. int eSize = 8 << op.Size;
  60. Operand d = GetVec(op.Rd);
  61. Operand imm = eSize switch {
  62. 16 => X86GetAllElements(context, (short)~op.Immediate),
  63. 32 => X86GetAllElements(context, (int)~op.Immediate),
  64. _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
  65. };
  66. Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm);
  67. if (op.RegisterSize == RegisterSize.Simd64)
  68. {
  69. res = context.VectorZeroUpper64(res);
  70. }
  71. context.Copy(GetVec(op.Rd), res);
  72. }
  73. else
  74. {
  75. EmitVectorImmBinaryOp(context, (op1, op2) =>
  76. {
  77. return context.BitwiseAnd(op1, context.BitwiseNot(op2));
  78. });
  79. }
  80. }
  81. public static void Bif_V(ArmEmitterContext context)
  82. {
  83. EmitBifBit(context, notRm: true);
  84. }
  85. public static void Bit_V(ArmEmitterContext context)
  86. {
  87. EmitBifBit(context, notRm: false);
  88. }
  89. private static void EmitBifBit(ArmEmitterContext context, bool notRm)
  90. {
  91. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  92. if (Optimizations.UseSse2)
  93. {
  94. Operand d = GetVec(op.Rd);
  95. Operand n = GetVec(op.Rn);
  96. Operand m = GetVec(op.Rm);
  97. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
  98. if (notRm)
  99. {
  100. res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
  101. }
  102. else
  103. {
  104. res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
  105. }
  106. res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
  107. if (op.RegisterSize == RegisterSize.Simd64)
  108. {
  109. res = context.VectorZeroUpper64(res);
  110. }
  111. context.Copy(d, res);
  112. }
  113. else
  114. {
  115. Operand res = context.VectorZero();
  116. int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
  117. for (int index = 0; index < elems; index++)
  118. {
  119. Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
  120. Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
  121. Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
  122. if (notRm)
  123. {
  124. m = context.BitwiseNot(m);
  125. }
  126. Operand e = context.BitwiseExclusiveOr(d, n);
  127. e = context.BitwiseAnd(e, m);
  128. e = context.BitwiseExclusiveOr(e, d);
  129. res = EmitVectorInsert(context, res, e, index, 3);
  130. }
  131. context.Copy(GetVec(op.Rd), res);
  132. }
  133. }
  134. public static void Bsl_V(ArmEmitterContext context)
  135. {
  136. if (Optimizations.UseSse2)
  137. {
  138. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  139. Operand d = GetVec(op.Rd);
  140. Operand n = GetVec(op.Rn);
  141. Operand m = GetVec(op.Rm);
  142. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  143. res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
  144. res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
  145. if (op.RegisterSize == RegisterSize.Simd64)
  146. {
  147. res = context.VectorZeroUpper64(res);
  148. }
  149. context.Copy(d, res);
  150. }
  151. else
  152. {
  153. EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
  154. {
  155. return context.BitwiseExclusiveOr(
  156. context.BitwiseAnd(op1,
  157. context.BitwiseExclusiveOr(op2, op3)), op3);
  158. });
  159. }
  160. }
  161. public static void Eor_V(ArmEmitterContext context)
  162. {
  163. if (Optimizations.UseSse2)
  164. {
  165. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  166. Operand n = GetVec(op.Rn);
  167. Operand m = GetVec(op.Rm);
  168. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
  169. if (op.RegisterSize == RegisterSize.Simd64)
  170. {
  171. res = context.VectorZeroUpper64(res);
  172. }
  173. context.Copy(GetVec(op.Rd), res);
  174. }
  175. else
  176. {
  177. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
  178. }
  179. }
  180. public static void Not_V(ArmEmitterContext context)
  181. {
  182. if (Optimizations.UseSse2)
  183. {
  184. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  185. Operand n = GetVec(op.Rn);
  186. Operand mask = X86GetAllElements(context, -1L);
  187. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
  188. if (op.RegisterSize == RegisterSize.Simd64)
  189. {
  190. res = context.VectorZeroUpper64(res);
  191. }
  192. context.Copy(GetVec(op.Rd), res);
  193. }
  194. else
  195. {
  196. EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
  197. }
  198. }
  199. public static void Orn_V(ArmEmitterContext context)
  200. {
  201. if (Optimizations.UseSse2)
  202. {
  203. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  204. Operand n = GetVec(op.Rn);
  205. Operand m = GetVec(op.Rm);
  206. Operand mask = X86GetAllElements(context, -1L);
  207. Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
  208. res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
  209. if (op.RegisterSize == RegisterSize.Simd64)
  210. {
  211. res = context.VectorZeroUpper64(res);
  212. }
  213. context.Copy(GetVec(op.Rd), res);
  214. }
  215. else
  216. {
  217. EmitVectorBinaryOpZx(context, (op1, op2) =>
  218. {
  219. return context.BitwiseOr(op1, context.BitwiseNot(op2));
  220. });
  221. }
  222. }
  223. public static void Orr_V(ArmEmitterContext context)
  224. {
  225. if (Optimizations.UseSse2)
  226. {
  227. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  228. Operand n = GetVec(op.Rn);
  229. Operand m = GetVec(op.Rm);
  230. Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
  231. if (op.RegisterSize == RegisterSize.Simd64)
  232. {
  233. res = context.VectorZeroUpper64(res);
  234. }
  235. context.Copy(GetVec(op.Rd), res);
  236. }
  237. else
  238. {
  239. EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
  240. }
  241. }
  242. public static void Orr_Vi(ArmEmitterContext context)
  243. {
  244. if (Optimizations.UseSse2)
  245. {
  246. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  247. int eSize = 8 << op.Size;
  248. Operand d = GetVec(op.Rd);
  249. Operand imm = eSize switch {
  250. 16 => X86GetAllElements(context, (short)op.Immediate),
  251. 32 => X86GetAllElements(context, (int)op.Immediate),
  252. _ => throw new InvalidOperationException($"Invalid element size {eSize}.")
  253. };
  254. Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm);
  255. if (op.RegisterSize == RegisterSize.Simd64)
  256. {
  257. res = context.VectorZeroUpper64(res);
  258. }
  259. context.Copy(GetVec(op.Rd), res);
  260. }
  261. else
  262. {
  263. EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
  264. }
  265. }
  266. public static void Rbit_V(ArmEmitterContext context)
  267. {
  268. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  269. if (Optimizations.UseGfni)
  270. {
  271. const long bitMatrix =
  272. (0b10000000L << 56) |
  273. (0b01000000L << 48) |
  274. (0b00100000L << 40) |
  275. (0b00010000L << 32) |
  276. (0b00001000L << 24) |
  277. (0b00000100L << 16) |
  278. (0b00000010L << 8) |
  279. (0b00000001L << 0);
  280. Operand vBitMatrix = X86GetAllElements(context, bitMatrix);
  281. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0));
  282. if (op.RegisterSize == RegisterSize.Simd64)
  283. {
  284. res = context.VectorZeroUpper64(res);
  285. }
  286. context.Copy(GetVec(op.Rd), res);
  287. }
  288. else
  289. {
  290. Operand res = context.VectorZero();
  291. int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
  292. for (int index = 0; index < elems; index++)
  293. {
  294. Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
  295. Operand de = EmitReverseBits8Op(context, ne);
  296. res = EmitVectorInsert(context, res, de, index, 0);
  297. }
  298. context.Copy(GetVec(op.Rd), res);
  299. }
  300. }
  301. private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op)
  302. {
  303. Debug.Assert(op.Type == OperandType.I64);
  304. Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)),
  305. context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1)));
  306. val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)),
  307. context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2)));
  308. return context.BitwiseOr(context.ShiftRightUI(val, Const(4)),
  309. context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4)));
  310. }
  311. public static void Rev16_V(ArmEmitterContext context)
  312. {
  313. if (Optimizations.UseSsse3)
  314. {
  315. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  316. Operand n = GetVec(op.Rn);
  317. const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
  318. const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
  319. Operand mask = X86GetScalar(context, maskE0);
  320. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  321. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  322. if (op.RegisterSize == RegisterSize.Simd64)
  323. {
  324. res = context.VectorZeroUpper64(res);
  325. }
  326. context.Copy(GetVec(op.Rd), res);
  327. }
  328. else
  329. {
  330. EmitRev_V(context, containerSize: 1);
  331. }
  332. }
  333. public static void Rev32_V(ArmEmitterContext context)
  334. {
  335. if (Optimizations.UseSsse3)
  336. {
  337. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  338. Operand n = GetVec(op.Rn);
  339. Operand mask;
  340. if (op.Size == 0)
  341. {
  342. const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
  343. const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
  344. mask = X86GetScalar(context, maskE0);
  345. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  346. }
  347. else /* if (op.Size == 1) */
  348. {
  349. const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
  350. const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
  351. mask = X86GetScalar(context, maskE0);
  352. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  353. }
  354. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  355. if (op.RegisterSize == RegisterSize.Simd64)
  356. {
  357. res = context.VectorZeroUpper64(res);
  358. }
  359. context.Copy(GetVec(op.Rd), res);
  360. }
  361. else
  362. {
  363. EmitRev_V(context, containerSize: 2);
  364. }
  365. }
  366. public static void Rev64_V(ArmEmitterContext context)
  367. {
  368. if (Optimizations.UseSsse3)
  369. {
  370. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  371. Operand n = GetVec(op.Rn);
  372. Operand mask;
  373. if (op.Size == 0)
  374. {
  375. const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
  376. const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
  377. mask = X86GetScalar(context, maskE0);
  378. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  379. }
  380. else if (op.Size == 1)
  381. {
  382. const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
  383. const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
  384. mask = X86GetScalar(context, maskE0);
  385. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  386. }
  387. else /* if (op.Size == 2) */
  388. {
  389. const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
  390. const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
  391. mask = X86GetScalar(context, maskE0);
  392. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  393. }
  394. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  395. if (op.RegisterSize == RegisterSize.Simd64)
  396. {
  397. res = context.VectorZeroUpper64(res);
  398. }
  399. context.Copy(GetVec(op.Rd), res);
  400. }
  401. else
  402. {
  403. EmitRev_V(context, containerSize: 3);
  404. }
  405. }
  406. private static void EmitRev_V(ArmEmitterContext context, int containerSize)
  407. {
  408. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  409. Operand res = context.VectorZero();
  410. int elems = op.GetBytesCount() >> op.Size;
  411. int containerMask = (1 << (containerSize - op.Size)) - 1;
  412. for (int index = 0; index < elems; index++)
  413. {
  414. int revIndex = index ^ containerMask;
  415. Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
  416. res = EmitVectorInsert(context, res, ne, index, op.Size);
  417. }
  418. context.Copy(GetVec(op.Rd), res);
  419. }
  420. }
  421. }