AInstEmitSimdMove.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. using ChocolArm64.Decoder;
  2. using ChocolArm64.State;
  3. using ChocolArm64.Translation;
  4. using System;
  5. using System.Reflection.Emit;
  6. using System.Runtime.Intrinsics.X86;
  7. using static ChocolArm64.Instruction.AInstEmitSimdHelper;
  8. namespace ChocolArm64.Instruction
  9. {
  10. static partial class AInstEmit
  11. {
  12. public static void Dup_Gp(AILEmitterCtx Context)
  13. {
  14. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  15. if (AOptimizations.UseSse2)
  16. {
  17. Context.EmitLdintzr(Op.Rn);
  18. switch (Op.Size)
  19. {
  20. case 0: Context.Emit(OpCodes.Conv_U1); break;
  21. case 1: Context.Emit(OpCodes.Conv_U2); break;
  22. case 2: Context.Emit(OpCodes.Conv_U4); break;
  23. }
  24. Type[] Types = new Type[] { UIntTypesPerSizeLog2[Op.Size] };
  25. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), Types));
  26. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  27. if (Op.RegisterSize == ARegisterSize.SIMD64)
  28. {
  29. EmitVectorZeroUpper(Context, Op.Rd);
  30. }
  31. }
  32. else
  33. {
  34. int Bytes = Op.GetBitsCount() >> 3;
  35. int Elems = Bytes >> Op.Size;
  36. for (int Index = 0; Index < Elems; Index++)
  37. {
  38. Context.EmitLdintzr(Op.Rn);
  39. EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
  40. }
  41. if (Op.RegisterSize == ARegisterSize.SIMD64)
  42. {
  43. EmitVectorZeroUpper(Context, Op.Rd);
  44. }
  45. }
  46. }
  47. public static void Dup_S(AILEmitterCtx Context)
  48. {
  49. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  50. EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
  51. EmitScalarSet(Context, Op.Rd, Op.Size);
  52. }
  53. public static void Dup_V(AILEmitterCtx Context)
  54. {
  55. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  56. int Bytes = Op.GetBitsCount() >> 3;
  57. int Elems = Bytes >> Op.Size;
  58. for (int Index = 0; Index < Elems; Index++)
  59. {
  60. EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
  61. EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
  62. }
  63. if (Op.RegisterSize == ARegisterSize.SIMD64)
  64. {
  65. EmitVectorZeroUpper(Context, Op.Rd);
  66. }
  67. }
  68. public static void Ext_V(AILEmitterCtx Context)
  69. {
  70. AOpCodeSimdExt Op = (AOpCodeSimdExt)Context.CurrOp;
  71. Context.EmitLdvec(Op.Rd);
  72. Context.EmitStvectmp();
  73. int Bytes = Op.GetBitsCount() >> 3;
  74. int Position = Op.Imm4;
  75. for (int Index = 0; Index < Bytes; Index++)
  76. {
  77. int Reg = Op.Imm4 + Index < Bytes ? Op.Rn : Op.Rm;
  78. if (Position == Bytes)
  79. {
  80. Position = 0;
  81. }
  82. EmitVectorExtractZx(Context, Reg, Position++, 0);
  83. EmitVectorInsertTmp(Context, Index, 0);
  84. }
  85. Context.EmitLdvectmp();
  86. Context.EmitStvec(Op.Rd);
  87. if (Op.RegisterSize == ARegisterSize.SIMD64)
  88. {
  89. EmitVectorZeroUpper(Context, Op.Rd);
  90. }
  91. }
  92. public static void Fcsel_S(AILEmitterCtx Context)
  93. {
  94. AOpCodeSimdFcond Op = (AOpCodeSimdFcond)Context.CurrOp;
  95. AILLabel LblTrue = new AILLabel();
  96. AILLabel LblEnd = new AILLabel();
  97. Context.EmitCondBranch(LblTrue, Op.Cond);
  98. EmitVectorExtractF(Context, Op.Rm, 0, Op.Size);
  99. Context.Emit(OpCodes.Br_S, LblEnd);
  100. Context.MarkLabel(LblTrue);
  101. EmitVectorExtractF(Context, Op.Rn, 0, Op.Size);
  102. Context.MarkLabel(LblEnd);
  103. EmitScalarSetF(Context, Op.Rd, Op.Size);
  104. }
  105. public static void Fmov_Ftoi(AILEmitterCtx Context)
  106. {
  107. AOpCodeSimdCvt Op = (AOpCodeSimdCvt)Context.CurrOp;
  108. EmitVectorExtractZx(Context, Op.Rn, 0, 3);
  109. EmitIntZeroUpperIfNeeded(Context);
  110. Context.EmitStintzr(Op.Rd);
  111. }
  112. public static void Fmov_Ftoi1(AILEmitterCtx Context)
  113. {
  114. AOpCodeSimdCvt Op = (AOpCodeSimdCvt)Context.CurrOp;
  115. EmitVectorExtractZx(Context, Op.Rn, 1, 3);
  116. EmitIntZeroUpperIfNeeded(Context);
  117. Context.EmitStintzr(Op.Rd);
  118. }
  119. public static void Fmov_Itof(AILEmitterCtx Context)
  120. {
  121. AOpCodeSimdCvt Op = (AOpCodeSimdCvt)Context.CurrOp;
  122. Context.EmitLdintzr(Op.Rn);
  123. EmitIntZeroUpperIfNeeded(Context);
  124. EmitScalarSet(Context, Op.Rd, 3);
  125. }
  126. public static void Fmov_Itof1(AILEmitterCtx Context)
  127. {
  128. AOpCodeSimdCvt Op = (AOpCodeSimdCvt)Context.CurrOp;
  129. Context.EmitLdintzr(Op.Rn);
  130. EmitIntZeroUpperIfNeeded(Context);
  131. EmitVectorInsert(Context, Op.Rd, 1, 3);
  132. }
  133. public static void Fmov_S(AILEmitterCtx Context)
  134. {
  135. AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
  136. EmitVectorExtractF(Context, Op.Rn, 0, Op.Size);
  137. EmitScalarSetF(Context, Op.Rd, Op.Size);
  138. }
  139. public static void Fmov_Si(AILEmitterCtx Context)
  140. {
  141. AOpCodeSimdFmov Op = (AOpCodeSimdFmov)Context.CurrOp;
  142. Context.EmitLdc_I8(Op.Imm);
  143. EmitScalarSet(Context, Op.Rd, Op.Size + 2);
  144. }
  145. public static void Fmov_V(AILEmitterCtx Context)
  146. {
  147. AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
  148. int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 4 : 2;
  149. for (int Index = 0; Index < (Elems >> Op.Size); Index++)
  150. {
  151. Context.EmitLdc_I8(Op.Imm);
  152. EmitVectorInsert(Context, Op.Rd, Index, Op.Size + 2);
  153. }
  154. if (Op.RegisterSize == ARegisterSize.SIMD64)
  155. {
  156. EmitVectorZeroUpper(Context, Op.Rd);
  157. }
  158. }
  159. public static void Ins_Gp(AILEmitterCtx Context)
  160. {
  161. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  162. Context.EmitLdintzr(Op.Rn);
  163. EmitVectorInsert(Context, Op.Rd, Op.DstIndex, Op.Size);
  164. }
  165. public static void Ins_V(AILEmitterCtx Context)
  166. {
  167. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  168. EmitVectorExtractZx(Context, Op.Rn, Op.SrcIndex, Op.Size);
  169. EmitVectorInsert(Context, Op.Rd, Op.DstIndex, Op.Size);
  170. }
  171. public static void Movi_V(AILEmitterCtx Context)
  172. {
  173. EmitVectorImmUnaryOp(Context, () => { });
  174. }
  175. public static void Mvni_V(AILEmitterCtx Context)
  176. {
  177. EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not));
  178. }
  179. public static void Smov_S(AILEmitterCtx Context)
  180. {
  181. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  182. EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size);
  183. EmitIntZeroUpperIfNeeded(Context);
  184. Context.EmitStintzr(Op.Rd);
  185. }
  186. public static void Tbl_V(AILEmitterCtx Context)
  187. {
  188. AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp;
  189. Context.EmitLdvec(Op.Rm);
  190. for (int Index = 0; Index < Op.Size; Index++)
  191. {
  192. Context.EmitLdvec((Op.Rn + Index) & 0x1f);
  193. }
  194. switch (Op.Size)
  195. {
  196. case 1: AVectorHelper.EmitCall(Context,
  197. nameof(AVectorHelper.Tbl1_V64),
  198. nameof(AVectorHelper.Tbl1_V128)); break;
  199. case 2: AVectorHelper.EmitCall(Context,
  200. nameof(AVectorHelper.Tbl2_V64),
  201. nameof(AVectorHelper.Tbl2_V128)); break;
  202. case 3: AVectorHelper.EmitCall(Context,
  203. nameof(AVectorHelper.Tbl3_V64),
  204. nameof(AVectorHelper.Tbl3_V128)); break;
  205. case 4: AVectorHelper.EmitCall(Context,
  206. nameof(AVectorHelper.Tbl4_V64),
  207. nameof(AVectorHelper.Tbl4_V128)); break;
  208. default: throw new InvalidOperationException();
  209. }
  210. Context.EmitStvec(Op.Rd);
  211. }
  212. public static void Trn1_V(AILEmitterCtx Context)
  213. {
  214. EmitVectorTranspose(Context, Part: 0);
  215. }
  216. public static void Trn2_V(AILEmitterCtx Context)
  217. {
  218. EmitVectorTranspose(Context, Part: 1);
  219. }
  220. public static void Umov_S(AILEmitterCtx Context)
  221. {
  222. AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
  223. EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
  224. Context.EmitStintzr(Op.Rd);
  225. }
  226. public static void Uzp1_V(AILEmitterCtx Context)
  227. {
  228. EmitVectorUnzip(Context, Part: 0);
  229. }
  230. public static void Uzp2_V(AILEmitterCtx Context)
  231. {
  232. EmitVectorUnzip(Context, Part: 1);
  233. }
  234. public static void Xtn_V(AILEmitterCtx Context)
  235. {
  236. AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
  237. int Elems = 8 >> Op.Size;
  238. int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
  239. if (AOptimizations.UseSse41 && Op.Size < 2)
  240. {
  241. void EmitZeroVector()
  242. {
  243. switch (Op.Size)
  244. {
  245. case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16Zero)); break;
  246. case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32Zero)); break;
  247. }
  248. }
  249. //For XTN, first operand is source, second operand is 0.
  250. //For XTN2, first operand is 0, second operand is source.
  251. if (Part != 0)
  252. {
  253. EmitZeroVector();
  254. }
  255. EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size + 1);
  256. //Set mask to discard the upper half of the wide elements.
  257. switch (Op.Size)
  258. {
  259. case 0: Context.EmitLdc_I4(0x00ff); break;
  260. case 1: Context.EmitLdc_I4(0x0000ffff); break;
  261. }
  262. Type WideType = IntTypesPerSizeLog2[Op.Size + 1];
  263. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { WideType }));
  264. WideType = VectorIntTypesPerSizeLog2[Op.Size + 1];
  265. Type[] WideTypes = new Type[] { WideType, WideType };
  266. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), WideTypes));
  267. if (Part == 0)
  268. {
  269. EmitZeroVector();
  270. }
  271. //Pack values with signed saturation, the signed saturation shouldn't
  272. //saturate anything since the upper bits were masked off.
  273. Type SseType = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
  274. Context.EmitCall(SseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), WideTypes));
  275. if (Part != 0)
  276. {
  277. //For XTN2, we additionally need to discard the upper bits
  278. //of the target register and OR the result with it.
  279. EmitVectorZeroUpper(Context, Op.Rd);
  280. EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  281. Type NarrowType = VectorUIntTypesPerSizeLog2[Op.Size];
  282. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { NarrowType, NarrowType }));
  283. }
  284. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  285. }
  286. else
  287. {
  288. if (Part != 0)
  289. {
  290. Context.EmitLdvec(Op.Rd);
  291. Context.EmitStvectmp();
  292. }
  293. for (int Index = 0; Index < Elems; Index++)
  294. {
  295. EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
  296. EmitVectorInsertTmp(Context, Part + Index, Op.Size);
  297. }
  298. Context.EmitLdvectmp();
  299. Context.EmitStvec(Op.Rd);
  300. if (Part == 0)
  301. {
  302. EmitVectorZeroUpper(Context, Op.Rd);
  303. }
  304. }
  305. }
  306. public static void Zip1_V(AILEmitterCtx Context)
  307. {
  308. EmitVectorZip(Context, Part: 0);
  309. }
  310. public static void Zip2_V(AILEmitterCtx Context)
  311. {
  312. EmitVectorZip(Context, Part: 1);
  313. }
  314. private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context)
  315. {
  316. if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 ||
  317. Context.CurrOp.RegisterSize == ARegisterSize.SIMD64)
  318. {
  319. Context.Emit(OpCodes.Conv_U4);
  320. Context.Emit(OpCodes.Conv_U8);
  321. }
  322. }
  323. private static void EmitVectorTranspose(AILEmitterCtx Context, int Part)
  324. {
  325. AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
  326. int Words = Op.GetBitsCount() >> 4;
  327. int Pairs = Words >> Op.Size;
  328. for (int Index = 0; Index < Pairs; Index++)
  329. {
  330. int Idx = Index << 1;
  331. EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
  332. EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
  333. EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
  334. EmitVectorInsertTmp(Context, Idx, Op.Size);
  335. }
  336. Context.EmitLdvectmp();
  337. Context.EmitStvec(Op.Rd);
  338. if (Op.RegisterSize == ARegisterSize.SIMD64)
  339. {
  340. EmitVectorZeroUpper(Context, Op.Rd);
  341. }
  342. }
  343. private static void EmitVectorUnzip(AILEmitterCtx Context, int Part)
  344. {
  345. AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
  346. int Words = Op.GetBitsCount() >> 4;
  347. int Pairs = Words >> Op.Size;
  348. for (int Index = 0; Index < Pairs; Index++)
  349. {
  350. int Idx = Index << 1;
  351. EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
  352. EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
  353. EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
  354. EmitVectorInsertTmp(Context, Index, Op.Size);
  355. }
  356. Context.EmitLdvectmp();
  357. Context.EmitStvec(Op.Rd);
  358. if (Op.RegisterSize == ARegisterSize.SIMD64)
  359. {
  360. EmitVectorZeroUpper(Context, Op.Rd);
  361. }
  362. }
  363. private static void EmitVectorZip(AILEmitterCtx Context, int Part)
  364. {
  365. AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
  366. if (AOptimizations.UseSse2)
  367. {
  368. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  369. EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
  370. Type[] Types = new Type[]
  371. {
  372. VectorUIntTypesPerSizeLog2[Op.Size],
  373. VectorUIntTypesPerSizeLog2[Op.Size]
  374. };
  375. string Name = Part == 0 || (Part != 0 && Op.RegisterSize == ARegisterSize.SIMD64)
  376. ? nameof(Sse2.UnpackLow)
  377. : nameof(Sse2.UnpackHigh);
  378. Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
  379. if (Op.RegisterSize == ARegisterSize.SIMD64 && Part != 0)
  380. {
  381. Context.EmitLdc_I4(8);
  382. Type[] ShTypes = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  383. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), ShTypes));
  384. }
  385. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  386. if (Op.RegisterSize == ARegisterSize.SIMD64 && Part == 0)
  387. {
  388. EmitVectorZeroUpper(Context, Op.Rd);
  389. }
  390. }
  391. else
  392. {
  393. int Words = Op.GetBitsCount() >> 4;
  394. int Pairs = Words >> Op.Size;
  395. int Base = Part != 0 ? Pairs : 0;
  396. for (int Index = 0; Index < Pairs; Index++)
  397. {
  398. int Idx = Index << 1;
  399. EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
  400. EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
  401. EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
  402. EmitVectorInsertTmp(Context, Idx, Op.Size);
  403. }
  404. Context.EmitLdvectmp();
  405. Context.EmitStvec(Op.Rd);
  406. if (Op.RegisterSize == ARegisterSize.SIMD64)
  407. {
  408. EmitVectorZeroUpper(Context, Op.Rd);
  409. }
  410. }
  411. }
  412. }
  413. }