InstEmitSimdMove.cs 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.Translation;
  4. using System;
  5. using static ARMeilleure.Instructions.InstEmitHelper;
  6. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  7. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  8. namespace ARMeilleure.Instructions
  9. {
  10. static partial class InstEmit
  11. {
  12. #region "Masks"
  13. private static readonly long[] _masksE0_TrnUzpXtn = new long[]
  14. {
  15. 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
  16. 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
  17. 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
  18. };
  19. private static readonly long[] _masksE1_TrnUzp = new long[]
  20. {
  21. 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
  22. 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
  23. 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
  24. };
  25. private static readonly long[] _masksE0_Uzp = new long[]
  26. {
  27. 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
  28. 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
  29. };
  30. private static readonly long[] _masksE1_Uzp = new long[]
  31. {
  32. 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
  33. 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
  34. };
  35. #endregion
  36. public static void Dup_Gp(ArmEmitterContext context)
  37. {
  38. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  39. Operand n = GetIntOrZR(context, op.Rn);
  40. if (Optimizations.UseSse2)
  41. {
  42. switch (op.Size)
  43. {
  44. case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
  45. case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
  46. case 2: n = context.ZeroExtend32(n.Type, n); break;
  47. }
  48. Operand res = context.VectorInsert(context.VectorZero(), n, 0);
  49. if (op.Size < 3)
  50. {
  51. if (op.RegisterSize == RegisterSize.Simd64)
  52. {
  53. res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
  54. }
  55. else
  56. {
  57. res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
  58. }
  59. }
  60. else
  61. {
  62. res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
  63. }
  64. context.Copy(GetVec(op.Rd), res);
  65. }
  66. else
  67. {
  68. Operand res = context.VectorZero();
  69. int elems = op.GetBytesCount() >> op.Size;
  70. for (int index = 0; index < elems; index++)
  71. {
  72. res = EmitVectorInsert(context, res, n, index, op.Size);
  73. }
  74. context.Copy(GetVec(op.Rd), res);
  75. }
  76. }
  77. public static void Dup_S(ArmEmitterContext context)
  78. {
  79. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  80. Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  81. context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
  82. }
  83. public static void Dup_V(ArmEmitterContext context)
  84. {
  85. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  86. if (Optimizations.UseSse2)
  87. {
  88. Operand res = GetVec(op.Rn);
  89. if (op.Size == 0)
  90. {
  91. if (op.DstIndex != 0)
  92. {
  93. res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
  94. }
  95. res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
  96. res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
  97. res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
  98. }
  99. else if (op.Size == 1)
  100. {
  101. if (op.DstIndex != 0)
  102. {
  103. res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
  104. }
  105. res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
  106. res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
  107. }
  108. else if (op.Size == 2)
  109. {
  110. int mask = op.DstIndex * 0b01010101;
  111. res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
  112. }
  113. else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
  114. {
  115. res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
  116. }
  117. else if (op.DstIndex == 1)
  118. {
  119. res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
  120. }
  121. if (op.RegisterSize == RegisterSize.Simd64)
  122. {
  123. res = context.VectorZeroUpper64(res);
  124. }
  125. context.Copy(GetVec(op.Rd), res);
  126. }
  127. else
  128. {
  129. Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  130. Operand res = context.VectorZero();
  131. int elems = op.GetBytesCount() >> op.Size;
  132. for (int index = 0; index < elems; index++)
  133. {
  134. res = EmitVectorInsert(context, res, ne, index, op.Size);
  135. }
  136. context.Copy(GetVec(op.Rd), res);
  137. }
  138. }
  139. public static void Ext_V(ArmEmitterContext context)
  140. {
  141. OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
  142. if (Optimizations.UseSse2)
  143. {
  144. Operand nShifted = GetVec(op.Rn);
  145. if (op.RegisterSize == RegisterSize.Simd64)
  146. {
  147. nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero());
  148. }
  149. nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
  150. Operand mShifted = GetVec(op.Rm);
  151. mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
  152. if (op.RegisterSize == RegisterSize.Simd64)
  153. {
  154. mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero());
  155. }
  156. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
  157. context.Copy(GetVec(op.Rd), res);
  158. }
  159. else
  160. {
  161. Operand res = context.VectorZero();
  162. int bytes = op.GetBytesCount();
  163. int position = op.Imm4 & (bytes - 1);
  164. for (int index = 0; index < bytes; index++)
  165. {
  166. int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
  167. Operand e = EmitVectorExtractZx(context, reg, position, 0);
  168. position = (position + 1) & (bytes - 1);
  169. res = EmitVectorInsert(context, res, e, index, 0);
  170. }
  171. context.Copy(GetVec(op.Rd), res);
  172. }
  173. }
  174. public static void Fcsel_S(ArmEmitterContext context)
  175. {
  176. OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
  177. Operand lblTrue = Label();
  178. Operand lblEnd = Label();
  179. Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
  180. context.BranchIfTrue(lblTrue, isTrue);
  181. OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
  182. Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
  183. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
  184. context.Branch(lblEnd);
  185. context.MarkLabel(lblTrue);
  186. Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
  187. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
  188. context.MarkLabel(lblEnd);
  189. }
  190. public static void Fmov_Ftoi(ArmEmitterContext context)
  191. {
  192. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  193. Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
  194. SetIntOrZR(context, op.Rd, ne);
  195. }
  196. public static void Fmov_Ftoi1(ArmEmitterContext context)
  197. {
  198. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  199. Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
  200. SetIntOrZR(context, op.Rd, ne);
  201. }
  202. public static void Fmov_Itof(ArmEmitterContext context)
  203. {
  204. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  205. Operand n = GetIntOrZR(context, op.Rn);
  206. context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
  207. }
  208. public static void Fmov_Itof1(ArmEmitterContext context)
  209. {
  210. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  211. Operand n = GetIntOrZR(context, op.Rn);
  212. context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3));
  213. }
  214. public static void Fmov_S(ArmEmitterContext context)
  215. {
  216. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  217. OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
  218. Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
  219. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
  220. }
  221. public static void Fmov_Si(ArmEmitterContext context)
  222. {
  223. OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
  224. if (op.Size == 0)
  225. {
  226. context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
  227. }
  228. else
  229. {
  230. context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
  231. }
  232. }
  233. public static void Fmov_Vi(ArmEmitterContext context)
  234. {
  235. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  236. Operand e = Const(op.Immediate);
  237. Operand res = context.VectorZero();
  238. int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
  239. for (int index = 0; index < (elems >> op.Size); index++)
  240. {
  241. res = EmitVectorInsert(context, res, e, index, op.Size + 2);
  242. }
  243. context.Copy(GetVec(op.Rd), res);
  244. }
  245. public static void Ins_Gp(ArmEmitterContext context)
  246. {
  247. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  248. Operand d = GetVec(op.Rd);
  249. Operand n = GetIntOrZR(context, op.Rn);
  250. context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
  251. }
  252. public static void Ins_V(ArmEmitterContext context)
  253. {
  254. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  255. Operand d = GetVec(op.Rd);
  256. Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
  257. context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
  258. }
  259. public static void Movi_V(ArmEmitterContext context)
  260. {
  261. if (Optimizations.UseSse2)
  262. {
  263. EmitMoviMvni(context, not: false);
  264. }
  265. else
  266. {
  267. EmitVectorImmUnaryOp(context, (op1) => op1);
  268. }
  269. }
  270. public static void Mvni_V(ArmEmitterContext context)
  271. {
  272. if (Optimizations.UseSse2)
  273. {
  274. EmitMoviMvni(context, not: true);
  275. }
  276. else
  277. {
  278. EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
  279. }
  280. }
  281. public static void Smov_S(ArmEmitterContext context)
  282. {
  283. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  284. Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
  285. if (op.RegisterSize == RegisterSize.Simd64)
  286. {
  287. ne = context.ZeroExtend32(OperandType.I64, ne);
  288. }
  289. SetIntOrZR(context, op.Rd, ne);
  290. }
  291. public static void Tbl_V(ArmEmitterContext context)
  292. {
  293. OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
  294. if (Optimizations.UseSsse3)
  295. {
  296. Operand n = GetVec(op.Rn);
  297. Operand m = GetVec(op.Rm);
  298. Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
  299. Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
  300. mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
  301. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
  302. for (int index = 1; index < op.Size; index++)
  303. {
  304. Operand ni = GetVec((op.Rn + index) & 0x1f);
  305. Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index);
  306. Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask);
  307. Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask);
  308. mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask);
  309. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2);
  310. res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
  311. }
  312. if (op.RegisterSize == RegisterSize.Simd64)
  313. {
  314. res = context.VectorZeroUpper64(res);
  315. }
  316. context.Copy(GetVec(op.Rd), res);
  317. }
  318. else
  319. {
  320. Operand[] args = new Operand[1 + op.Size];
  321. args[0] = GetVec(op.Rm);
  322. for (int index = 0; index < op.Size; index++)
  323. {
  324. args[1 + index] = GetVec((op.Rn + index) & 0x1f);
  325. }
  326. Delegate dlg = null;
  327. switch (op.Size)
  328. {
  329. case 1: dlg = op.RegisterSize == RegisterSize.Simd64
  330. ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64)
  331. : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break;
  332. case 2: dlg = op.RegisterSize == RegisterSize.Simd64
  333. ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64)
  334. : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break;
  335. case 3: dlg = op.RegisterSize == RegisterSize.Simd64
  336. ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64)
  337. : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break;
  338. case 4: dlg = op.RegisterSize == RegisterSize.Simd64
  339. ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64)
  340. : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break;
  341. }
  342. context.Copy(GetVec(op.Rd), context.Call(dlg, args));
  343. }
  344. }
  345. public static void Trn1_V(ArmEmitterContext context)
  346. {
  347. EmitVectorTranspose(context, part: 0);
  348. }
  349. public static void Trn2_V(ArmEmitterContext context)
  350. {
  351. EmitVectorTranspose(context, part: 1);
  352. }
  353. public static void Umov_S(ArmEmitterContext context)
  354. {
  355. OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
  356. Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  357. SetIntOrZR(context, op.Rd, ne);
  358. }
  359. public static void Uzp1_V(ArmEmitterContext context)
  360. {
  361. EmitVectorUnzip(context, part: 0);
  362. }
  363. public static void Uzp2_V(ArmEmitterContext context)
  364. {
  365. EmitVectorUnzip(context, part: 1);
  366. }
  367. public static void Xtn_V(ArmEmitterContext context)
  368. {
  369. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  370. if (Optimizations.UseSsse3)
  371. {
  372. Operand d = GetVec(op.Rd);
  373. Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
  374. Operand n = GetVec(op.Rn);
  375. Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]);
  376. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  377. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  378. ? Intrinsic.X86Movlhps
  379. : Intrinsic.X86Movhlps;
  380. res = context.AddIntrinsic(movInst, res, res2);
  381. context.Copy(GetVec(op.Rd), res);
  382. }
  383. else
  384. {
  385. int elems = 8 >> op.Size;
  386. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  387. Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
  388. for (int index = 0; index < elems; index++)
  389. {
  390. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  391. res = EmitVectorInsert(context, res, ne, part + index, op.Size);
  392. }
  393. context.Copy(GetVec(op.Rd), res);
  394. }
  395. }
  396. public static void Zip1_V(ArmEmitterContext context)
  397. {
  398. EmitVectorZip(context, part: 0);
  399. }
  400. public static void Zip2_V(ArmEmitterContext context)
  401. {
  402. EmitVectorZip(context, part: 1);
  403. }
  404. private static void EmitMoviMvni(ArmEmitterContext context, bool not)
  405. {
  406. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  407. long imm = op.Immediate;
  408. switch (op.Size)
  409. {
  410. case 0: imm *= 0x01010101; break;
  411. case 1: imm *= 0x00010001; break;
  412. }
  413. if (not)
  414. {
  415. imm = ~imm;
  416. }
  417. Operand mask;
  418. if (op.Size < 3)
  419. {
  420. mask = X86GetAllElements(context, (int)imm);
  421. }
  422. else
  423. {
  424. mask = X86GetAllElements(context, imm);
  425. }
  426. if (op.RegisterSize == RegisterSize.Simd64)
  427. {
  428. mask = context.VectorZeroUpper64(mask);
  429. }
  430. context.Copy(GetVec(op.Rd), mask);
  431. }
  432. private static void EmitVectorTranspose(ArmEmitterContext context, int part)
  433. {
  434. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  435. if (Optimizations.UseSsse3)
  436. {
  437. Operand mask = null;
  438. if (op.Size < 3)
  439. {
  440. long maskE0 = _masksE0_TrnUzpXtn[op.Size];
  441. long maskE1 = _masksE1_TrnUzp [op.Size];
  442. mask = X86GetScalar(context, maskE0);
  443. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  444. }
  445. Operand n = GetVec(op.Rn);
  446. if (op.Size < 3)
  447. {
  448. n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  449. }
  450. Operand m = GetVec(op.Rm);
  451. if (op.Size < 3)
  452. {
  453. m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
  454. }
  455. Intrinsic punpckInst = part == 0
  456. ? X86PunpcklInstruction[op.Size]
  457. : X86PunpckhInstruction[op.Size];
  458. Operand res = context.AddIntrinsic(punpckInst, n, m);
  459. if (op.RegisterSize == RegisterSize.Simd64)
  460. {
  461. res = context.VectorZeroUpper64(res);
  462. }
  463. context.Copy(GetVec(op.Rd), res);
  464. }
  465. else
  466. {
  467. Operand res = context.VectorZero();
  468. int pairs = op.GetPairsCount() >> op.Size;
  469. for (int index = 0; index < pairs; index++)
  470. {
  471. int pairIndex = index << 1;
  472. Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
  473. Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
  474. res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
  475. res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
  476. }
  477. context.Copy(GetVec(op.Rd), res);
  478. }
  479. }
  480. private static void EmitVectorUnzip(ArmEmitterContext context, int part)
  481. {
  482. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  483. if (Optimizations.UseSsse3)
  484. {
  485. if (op.RegisterSize == RegisterSize.Simd128)
  486. {
  487. Operand mask = null;
  488. if (op.Size < 3)
  489. {
  490. long maskE0 = _masksE0_TrnUzpXtn[op.Size];
  491. long maskE1 = _masksE1_TrnUzp [op.Size];
  492. mask = X86GetScalar(context, maskE0);
  493. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  494. }
  495. Operand n = GetVec(op.Rn);
  496. if (op.Size < 3)
  497. {
  498. n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
  499. }
  500. Operand m = GetVec(op.Rm);
  501. if (op.Size < 3)
  502. {
  503. m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
  504. }
  505. Intrinsic punpckInst = part == 0
  506. ? Intrinsic.X86Punpcklqdq
  507. : Intrinsic.X86Punpckhqdq;
  508. Operand res = context.AddIntrinsic(punpckInst, n, m);
  509. context.Copy(GetVec(op.Rd), res);
  510. }
  511. else
  512. {
  513. Operand n = GetVec(op.Rn);
  514. Operand m = GetVec(op.Rm);
  515. Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
  516. Operand res = context.AddIntrinsic(punpcklInst, n, m);
  517. if (op.Size < 2)
  518. {
  519. long maskE0 = _masksE0_Uzp[op.Size];
  520. long maskE1 = _masksE1_Uzp[op.Size];
  521. Operand mask = X86GetScalar(context, maskE0);
  522. mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
  523. res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
  524. }
  525. Intrinsic punpckInst = part == 0
  526. ? Intrinsic.X86Punpcklqdq
  527. : Intrinsic.X86Punpckhqdq;
  528. res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
  529. context.Copy(GetVec(op.Rd), res);
  530. }
  531. }
  532. else
  533. {
  534. Operand res = context.VectorZero();
  535. int pairs = op.GetPairsCount() >> op.Size;
  536. for (int index = 0; index < pairs; index++)
  537. {
  538. int idx = index << 1;
  539. Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
  540. Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
  541. res = EmitVectorInsert(context, res, ne, index, op.Size);
  542. res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
  543. }
  544. context.Copy(GetVec(op.Rd), res);
  545. }
  546. }
  547. private static void EmitVectorZip(ArmEmitterContext context, int part)
  548. {
  549. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  550. if (Optimizations.UseSse2)
  551. {
  552. Operand n = GetVec(op.Rn);
  553. Operand m = GetVec(op.Rm);
  554. if (op.RegisterSize == RegisterSize.Simd128)
  555. {
  556. Intrinsic punpckInst = part == 0
  557. ? X86PunpcklInstruction[op.Size]
  558. : X86PunpckhInstruction[op.Size];
  559. Operand res = context.AddIntrinsic(punpckInst, n, m);
  560. context.Copy(GetVec(op.Rd), res);
  561. }
  562. else
  563. {
  564. Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
  565. Intrinsic punpckInst = part == 0
  566. ? Intrinsic.X86Punpcklqdq
  567. : Intrinsic.X86Punpckhqdq;
  568. res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
  569. context.Copy(GetVec(op.Rd), res);
  570. }
  571. }
  572. else
  573. {
  574. Operand res = context.VectorZero();
  575. int pairs = op.GetPairsCount() >> op.Size;
  576. int baseIndex = part != 0 ? pairs : 0;
  577. for (int index = 0; index < pairs; index++)
  578. {
  579. int pairIndex = index << 1;
  580. Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
  581. Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
  582. res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
  583. res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
  584. }
  585. context.Copy(GetVec(op.Rd), res);
  586. }
  587. }
  588. }
  589. }