InstEmitSimdMove.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. using ChocolArm64.Decoders;
  2. using ChocolArm64.State;
  3. using ChocolArm64.Translation;
  4. using System;
  5. using System.Reflection.Emit;
  6. using System.Runtime.Intrinsics;
  7. using System.Runtime.Intrinsics.X86;
  8. using static ChocolArm64.Instructions.InstEmitSimdHelper;
  9. namespace ChocolArm64.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void Dup_Gp(ILEmitterCtx context)
  14. {
  15. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  16. if (Optimizations.UseSse2)
  17. {
  18. Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
  19. context.EmitLdintzr(op.Rn);
  20. switch (op.Size)
  21. {
  22. case 0: context.Emit(OpCodes.Conv_U1); break;
  23. case 1: context.Emit(OpCodes.Conv_U2); break;
  24. case 2: context.Emit(OpCodes.Conv_U4); break;
  25. }
  26. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
  27. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  28. }
  29. else
  30. {
  31. int bytes = op.GetBitsCount() >> 3;
  32. int elems = bytes >> op.Size;
  33. for (int index = 0; index < elems; index++)
  34. {
  35. context.EmitLdintzr(op.Rn);
  36. EmitVectorInsert(context, op.Rd, index, op.Size);
  37. }
  38. }
  39. if (op.RegisterSize == RegisterSize.Simd64)
  40. {
  41. EmitVectorZeroUpper(context, op.Rd);
  42. }
  43. }
  44. public static void Dup_S(ILEmitterCtx context)
  45. {
  46. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  47. EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  48. EmitScalarSet(context, op.Rd, op.Size);
  49. }
  50. public static void Dup_V(ILEmitterCtx context)
  51. {
  52. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  53. if (Optimizations.UseSse2)
  54. {
  55. Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
  56. EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  57. switch (op.Size)
  58. {
  59. case 0: context.Emit(OpCodes.Conv_U1); break;
  60. case 1: context.Emit(OpCodes.Conv_U2); break;
  61. case 2: context.Emit(OpCodes.Conv_U4); break;
  62. }
  63. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
  64. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  65. }
  66. else
  67. {
  68. int bytes = op.GetBitsCount() >> 3;
  69. int elems = bytes >> op.Size;
  70. for (int index = 0; index < elems; index++)
  71. {
  72. EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  73. EmitVectorInsert(context, op.Rd, index, op.Size);
  74. }
  75. }
  76. if (op.RegisterSize == RegisterSize.Simd64)
  77. {
  78. EmitVectorZeroUpper(context, op.Rd);
  79. }
  80. }
  81. public static void Ext_V(ILEmitterCtx context)
  82. {
  83. OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;
  84. if (Optimizations.UseSse2)
  85. {
  86. Type[] typesShs = new Type[] { typeof(Vector128<byte>), typeof(byte) };
  87. Type[] typesOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
  88. EmitLdvecWithUnsignedCast(context, op.Rn, 0);
  89. if (op.RegisterSize == RegisterSize.Simd64)
  90. {
  91. VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
  92. context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
  93. }
  94. context.EmitLdc_I4(op.Imm4);
  95. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));
  96. EmitLdvecWithUnsignedCast(context, op.Rm, 0);
  97. context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
  98. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));
  99. if (op.RegisterSize == RegisterSize.Simd64)
  100. {
  101. VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
  102. context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
  103. }
  104. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
  105. EmitStvecWithUnsignedCast(context, op.Rd, 0);
  106. }
  107. else
  108. {
  109. int bytes = op.GetBitsCount() >> 3;
  110. int position = op.Imm4;
  111. for (int index = 0; index < bytes; index++)
  112. {
  113. int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
  114. if (position == bytes)
  115. {
  116. position = 0;
  117. }
  118. EmitVectorExtractZx(context, reg, position++, 0);
  119. EmitVectorInsertTmp(context, index, 0);
  120. }
  121. context.EmitLdvectmp();
  122. context.EmitStvec(op.Rd);
  123. if (op.RegisterSize == RegisterSize.Simd64)
  124. {
  125. EmitVectorZeroUpper(context, op.Rd);
  126. }
  127. }
  128. }
  129. public static void Fcsel_S(ILEmitterCtx context)
  130. {
  131. OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp;
  132. ILLabel lblTrue = new ILLabel();
  133. ILLabel lblEnd = new ILLabel();
  134. context.EmitCondBranch(lblTrue, op.Cond);
  135. EmitVectorExtractF(context, op.Rm, 0, op.Size);
  136. context.Emit(OpCodes.Br_S, lblEnd);
  137. context.MarkLabel(lblTrue);
  138. EmitVectorExtractF(context, op.Rn, 0, op.Size);
  139. context.MarkLabel(lblEnd);
  140. EmitScalarSetF(context, op.Rd, op.Size);
  141. }
  142. public static void Fmov_Ftoi(ILEmitterCtx context)
  143. {
  144. OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
  145. EmitVectorExtractZx(context, op.Rn, 0, 3);
  146. EmitIntZeroUpperIfNeeded(context);
  147. context.EmitStintzr(op.Rd);
  148. }
  149. public static void Fmov_Ftoi1(ILEmitterCtx context)
  150. {
  151. OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
  152. EmitVectorExtractZx(context, op.Rn, 1, 3);
  153. EmitIntZeroUpperIfNeeded(context);
  154. context.EmitStintzr(op.Rd);
  155. }
  156. public static void Fmov_Itof(ILEmitterCtx context)
  157. {
  158. OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
  159. context.EmitLdintzr(op.Rn);
  160. EmitIntZeroUpperIfNeeded(context);
  161. EmitScalarSet(context, op.Rd, 3);
  162. }
  163. public static void Fmov_Itof1(ILEmitterCtx context)
  164. {
  165. OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
  166. context.EmitLdintzr(op.Rn);
  167. EmitIntZeroUpperIfNeeded(context);
  168. EmitVectorInsert(context, op.Rd, 1, 3);
  169. }
  170. public static void Fmov_S(ILEmitterCtx context)
  171. {
  172. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  173. EmitVectorExtractF(context, op.Rn, 0, op.Size);
  174. EmitScalarSetF(context, op.Rd, op.Size);
  175. }
  176. public static void Fmov_Si(ILEmitterCtx context)
  177. {
  178. OpCodeSimdFmov64 op = (OpCodeSimdFmov64)context.CurrOp;
  179. context.EmitLdc_I8(op.Imm);
  180. EmitScalarSet(context, op.Rd, op.Size + 2);
  181. }
  182. public static void Fmov_V(ILEmitterCtx context)
  183. {
  184. OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
  185. int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
  186. for (int index = 0; index < (elems >> op.Size); index++)
  187. {
  188. context.EmitLdc_I8(op.Imm);
  189. EmitVectorInsert(context, op.Rd, index, op.Size + 2);
  190. }
  191. if (op.RegisterSize == RegisterSize.Simd64)
  192. {
  193. EmitVectorZeroUpper(context, op.Rd);
  194. }
  195. }
  196. public static void Ins_Gp(ILEmitterCtx context)
  197. {
  198. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  199. context.EmitLdintzr(op.Rn);
  200. EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size);
  201. }
  202. public static void Ins_V(ILEmitterCtx context)
  203. {
  204. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  205. EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
  206. EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size);
  207. }
  208. public static void Movi_V(ILEmitterCtx context)
  209. {
  210. EmitVectorImmUnaryOp(context, () => { });
  211. }
  212. public static void Mvni_V(ILEmitterCtx context)
  213. {
  214. EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
  215. }
  216. public static void Smov_S(ILEmitterCtx context)
  217. {
  218. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  219. EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
  220. EmitIntZeroUpperIfNeeded(context);
  221. context.EmitStintzr(op.Rd);
  222. }
  223. public static void Tbl_V(ILEmitterCtx context)
  224. {
  225. OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;
  226. context.EmitLdvec(op.Rm);
  227. for (int index = 0; index < op.Size; index++)
  228. {
  229. context.EmitLdvec((op.Rn + index) & 0x1f);
  230. }
  231. switch (op.Size)
  232. {
  233. case 1: VectorHelper.EmitCall(context,
  234. nameof(VectorHelper.Tbl1_V64),
  235. nameof(VectorHelper.Tbl1_V128)); break;
  236. case 2: VectorHelper.EmitCall(context,
  237. nameof(VectorHelper.Tbl2_V64),
  238. nameof(VectorHelper.Tbl2_V128)); break;
  239. case 3: VectorHelper.EmitCall(context,
  240. nameof(VectorHelper.Tbl3_V64),
  241. nameof(VectorHelper.Tbl3_V128)); break;
  242. case 4: VectorHelper.EmitCall(context,
  243. nameof(VectorHelper.Tbl4_V64),
  244. nameof(VectorHelper.Tbl4_V128)); break;
  245. default: throw new InvalidOperationException();
  246. }
  247. context.EmitStvec(op.Rd);
  248. }
  249. public static void Trn1_V(ILEmitterCtx context)
  250. {
  251. EmitVectorTranspose(context, part: 0);
  252. }
  253. public static void Trn2_V(ILEmitterCtx context)
  254. {
  255. EmitVectorTranspose(context, part: 1);
  256. }
  257. public static void Umov_S(ILEmitterCtx context)
  258. {
  259. OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
  260. EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
  261. context.EmitStintzr(op.Rd);
  262. }
  263. public static void Uzp1_V(ILEmitterCtx context)
  264. {
  265. EmitVectorUnzip(context, part: 0);
  266. }
  267. public static void Uzp2_V(ILEmitterCtx context)
  268. {
  269. EmitVectorUnzip(context, part: 1);
  270. }
  271. public static void Xtn_V(ILEmitterCtx context)
  272. {
  273. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  274. int elems = 8 >> op.Size;
  275. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  276. if (Optimizations.UseSse41 && op.Size < 2)
  277. {
  278. void EmitZeroVector()
  279. {
  280. switch (op.Size)
  281. {
  282. case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break;
  283. case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break;
  284. }
  285. }
  286. //For XTN, first operand is source, second operand is 0.
  287. //For XTN2, first operand is 0, second operand is source.
  288. if (part != 0)
  289. {
  290. EmitZeroVector();
  291. }
  292. EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
  293. //Set mask to discard the upper half of the wide elements.
  294. switch (op.Size)
  295. {
  296. case 0: context.EmitLdc_I4(0x00ff); break;
  297. case 1: context.EmitLdc_I4(0x0000ffff); break;
  298. }
  299. Type wideType = IntTypesPerSizeLog2[op.Size + 1];
  300. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType }));
  301. wideType = VectorIntTypesPerSizeLog2[op.Size + 1];
  302. Type[] wideTypes = new Type[] { wideType, wideType };
  303. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes));
  304. if (part == 0)
  305. {
  306. EmitZeroVector();
  307. }
  308. //Pack values with signed saturation, the signed saturation shouldn't
  309. //saturate anything since the upper bits were masked off.
  310. Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
  311. context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes));
  312. if (part != 0)
  313. {
  314. //For XTN2, we additionally need to discard the upper bits
  315. //of the target register and OR the result with it.
  316. EmitVectorZeroUpper(context, op.Rd);
  317. EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
  318. Type narrowType = VectorUIntTypesPerSizeLog2[op.Size];
  319. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType }));
  320. }
  321. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  322. }
  323. else
  324. {
  325. if (part != 0)
  326. {
  327. context.EmitLdvec(op.Rd);
  328. context.EmitStvectmp();
  329. }
  330. for (int index = 0; index < elems; index++)
  331. {
  332. EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  333. EmitVectorInsertTmp(context, part + index, op.Size);
  334. }
  335. context.EmitLdvectmp();
  336. context.EmitStvec(op.Rd);
  337. if (part == 0)
  338. {
  339. EmitVectorZeroUpper(context, op.Rd);
  340. }
  341. }
  342. }
  343. public static void Zip1_V(ILEmitterCtx context)
  344. {
  345. EmitVectorZip(context, part: 0);
  346. }
  347. public static void Zip2_V(ILEmitterCtx context)
  348. {
  349. EmitVectorZip(context, part: 1);
  350. }
  351. private static void EmitIntZeroUpperIfNeeded(ILEmitterCtx context)
  352. {
  353. if (context.CurrOp.RegisterSize == RegisterSize.Int32 ||
  354. context.CurrOp.RegisterSize == RegisterSize.Simd64)
  355. {
  356. context.Emit(OpCodes.Conv_U4);
  357. context.Emit(OpCodes.Conv_U8);
  358. }
  359. }
  360. private static void EmitVectorTranspose(ILEmitterCtx context, int part)
  361. {
  362. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  363. int words = op.GetBitsCount() >> 4;
  364. int pairs = words >> op.Size;
  365. for (int index = 0; index < pairs; index++)
  366. {
  367. int idx = index << 1;
  368. EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
  369. EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
  370. EmitVectorInsertTmp(context, idx + 1, op.Size);
  371. EmitVectorInsertTmp(context, idx, op.Size);
  372. }
  373. context.EmitLdvectmp();
  374. context.EmitStvec(op.Rd);
  375. if (op.RegisterSize == RegisterSize.Simd64)
  376. {
  377. EmitVectorZeroUpper(context, op.Rd);
  378. }
  379. }
  380. private static void EmitVectorUnzip(ILEmitterCtx context, int part)
  381. {
  382. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  383. int words = op.GetBitsCount() >> 4;
  384. int pairs = words >> op.Size;
  385. for (int index = 0; index < pairs; index++)
  386. {
  387. int idx = index << 1;
  388. EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
  389. EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
  390. EmitVectorInsertTmp(context, pairs + index, op.Size);
  391. EmitVectorInsertTmp(context, index, op.Size);
  392. }
  393. context.EmitLdvectmp();
  394. context.EmitStvec(op.Rd);
  395. if (op.RegisterSize == RegisterSize.Simd64)
  396. {
  397. EmitVectorZeroUpper(context, op.Rd);
  398. }
  399. }
  400. private static void EmitVectorZip(ILEmitterCtx context, int part)
  401. {
  402. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  403. if (Optimizations.UseSse2)
  404. {
  405. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  406. EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
  407. Type[] types = new Type[]
  408. {
  409. VectorUIntTypesPerSizeLog2[op.Size],
  410. VectorUIntTypesPerSizeLog2[op.Size]
  411. };
  412. string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64)
  413. ? nameof(Sse2.UnpackLow)
  414. : nameof(Sse2.UnpackHigh);
  415. context.EmitCall(typeof(Sse2).GetMethod(name, types));
  416. if (op.RegisterSize == RegisterSize.Simd64 && part != 0)
  417. {
  418. context.EmitLdc_I4(8);
  419. Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  420. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes));
  421. }
  422. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  423. if (op.RegisterSize == RegisterSize.Simd64 && part == 0)
  424. {
  425. EmitVectorZeroUpper(context, op.Rd);
  426. }
  427. }
  428. else
  429. {
  430. int words = op.GetBitsCount() >> 4;
  431. int pairs = words >> op.Size;
  432. int Base = part != 0 ? pairs : 0;
  433. for (int index = 0; index < pairs; index++)
  434. {
  435. int idx = index << 1;
  436. EmitVectorExtractZx(context, op.Rn, Base + index, op.Size);
  437. EmitVectorExtractZx(context, op.Rm, Base + index, op.Size);
  438. EmitVectorInsertTmp(context, idx + 1, op.Size);
  439. EmitVectorInsertTmp(context, idx, op.Size);
  440. }
  441. context.EmitLdvectmp();
  442. context.EmitStvec(op.Rd);
  443. if (op.RegisterSize == RegisterSize.Simd64)
  444. {
  445. EmitVectorZeroUpper(context, op.Rd);
  446. }
  447. }
  448. }
  449. }
  450. }