InstEmitSimdCvt32.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  11. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  12. namespace ARMeilleure.Instructions
  13. {
  14. static partial class InstEmit32
  15. {
  16. private static int FlipVdBits(int vd, bool lowBit)
  17. {
  18. if (lowBit)
  19. {
  20. // Move the low bit to the top.
  21. return ((vd & 0x1) << 4) | (vd >> 1);
  22. }
  23. else
  24. {
  25. // Move the high bit to the bottom.
  26. return ((vd & 0xf) << 1) | (vd >> 4);
  27. }
  28. }
  29. private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
  30. {
  31. MethodInfo info;
  32. if (op1.Type == OperandType.FP64)
  33. {
  34. info = unsigned
  35. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
  36. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
  37. }
  38. else
  39. {
  40. info = unsigned
  41. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
  42. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
  43. }
  44. return context.Call(info, op1);
  45. }
  46. public static void Vcvt_V(ArmEmitterContext context)
  47. {
  48. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  49. bool unsigned = (op.Opc & 1) != 0;
  50. bool toInteger = (op.Opc & 2) != 0;
  51. OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
  52. if (toInteger)
  53. {
  54. if (Optimizations.UseSse41)
  55. {
  56. EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
  57. }
  58. else
  59. {
  60. EmitVectorUnaryOpF32(context, (op1) =>
  61. {
  62. return EmitSaturateFloatToInt(context, op1, unsigned);
  63. });
  64. }
  65. }
  66. else
  67. {
  68. if (Optimizations.UseSse2)
  69. {
  70. EmitVectorUnaryOpSimd32(context, (n) =>
  71. {
  72. if (unsigned)
  73. {
  74. Operand mask = X86GetAllElements(context, 0x47800000);
  75. Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
  76. res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
  77. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
  78. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
  79. res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
  80. res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
  81. return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
  82. }
  83. else
  84. {
  85. return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
  86. }
  87. });
  88. }
  89. else
  90. {
  91. if (unsigned)
  92. {
  93. EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
  94. }
  95. else
  96. {
  97. EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
  98. }
  99. }
  100. }
  101. }
  102. public static void Vcvt_FD(ArmEmitterContext context)
  103. {
  104. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  105. int vm = op.Vm;
  106. int vd;
  107. if (op.Size == 3)
  108. {
  109. vd = FlipVdBits(op.Vd, false);
  110. // Double to single.
  111. Operand fp = ExtractScalar(context, OperandType.FP64, vm);
  112. Operand res = context.ConvertToFP(OperandType.FP32, fp);
  113. InsertScalar(context, vd, res);
  114. }
  115. else
  116. {
  117. vd = FlipVdBits(op.Vd, true);
  118. // Single to double.
  119. Operand fp = ExtractScalar(context, OperandType.FP32, vm);
  120. Operand res = context.ConvertToFP(OperandType.FP64, fp);
  121. InsertScalar(context, vd, res);
  122. }
  123. }
  124. public static void Vcvt_FI(ArmEmitterContext context)
  125. {
  126. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  127. bool toInteger = (op.Opc2 & 0b100) != 0;
  128. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  129. if (toInteger)
  130. {
  131. bool unsigned = (op.Opc2 & 1) == 0;
  132. bool roundWithFpscr = op.Opc != 1;
  133. if (!roundWithFpscr && Optimizations.UseSse41)
  134. {
  135. EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
  136. }
  137. else
  138. {
  139. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  140. Operand asInteger;
  141. // TODO: Fast Path.
  142. if (roundWithFpscr)
  143. {
  144. MethodInfo info;
  145. if (floatSize == OperandType.FP64)
  146. {
  147. info = unsigned
  148. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.DoubleToUInt32))
  149. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.DoubleToInt32));
  150. }
  151. else
  152. {
  153. info = unsigned
  154. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.FloatToUInt32))
  155. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.FloatToInt32));
  156. }
  157. asInteger = context.Call(info, toConvert);
  158. }
  159. else
  160. {
  161. // Round towards zero.
  162. asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  163. }
  164. InsertScalar(context, op.Vd, asInteger);
  165. }
  166. }
  167. else
  168. {
  169. bool unsigned = op.Opc == 0;
  170. Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
  171. Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
  172. InsertScalar(context, op.Vd, asFloat);
  173. }
  174. }
  175. private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  176. {
  177. IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
  178. string name = nameof(Math.Round);
  179. MethodInfo info = (op.Size & 1) == 0
  180. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
  181. : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
  182. return context.Call(info, n, Const((int)roundMode));
  183. }
  184. private static FPRoundingMode RMToRoundMode(int rm)
  185. {
  186. FPRoundingMode roundMode;
  187. switch (rm)
  188. {
  189. case 0b01:
  190. roundMode = FPRoundingMode.ToNearest;
  191. break;
  192. case 0b10:
  193. roundMode = FPRoundingMode.TowardsPlusInfinity;
  194. break;
  195. case 0b11:
  196. roundMode = FPRoundingMode.TowardsMinusInfinity;
  197. break;
  198. default:
  199. throw new ArgumentOutOfRangeException(nameof(rm));
  200. }
  201. return roundMode;
  202. }
  203. public static void Vcvt_R(ArmEmitterContext context)
  204. {
  205. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  206. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  207. bool unsigned = (op.Opc & 1) == 0;
  208. int rm = op.Opc2 & 3;
  209. if (Optimizations.UseSse41 && rm != 0b00)
  210. {
  211. EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
  212. }
  213. else
  214. {
  215. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  216. switch (rm)
  217. {
  218. case 0b00: // Away
  219. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  220. break;
  221. case 0b01: // Nearest
  222. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  223. break;
  224. case 0b10: // Towards positive infinity
  225. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  226. break;
  227. case 0b11: // Towards negative infinity
  228. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  229. break;
  230. }
  231. Operand asInteger;
  232. asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  233. InsertScalar(context, op.Vd, asInteger);
  234. }
  235. }
  236. public static void Vrint_RM(ArmEmitterContext context)
  237. {
  238. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  239. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  240. int rm = op.Opc2 & 3;
  241. if (Optimizations.UseSse2 && rm != 0b00)
  242. {
  243. EmitScalarUnaryOpSimd32(context, (m) =>
  244. {
  245. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  246. FPRoundingMode roundMode = RMToRoundMode(rm);
  247. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
  248. });
  249. }
  250. else
  251. {
  252. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  253. switch (rm)
  254. {
  255. case 0b00: // Away
  256. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  257. break;
  258. case 0b01: // Nearest
  259. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  260. break;
  261. case 0b10: // Towards positive infinity
  262. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  263. break;
  264. case 0b11: // Towards negative infinity
  265. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  266. break;
  267. }
  268. InsertScalar(context, op.Vd, toConvert);
  269. }
  270. }
  271. public static void Vrint_Z(ArmEmitterContext context)
  272. {
  273. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  274. if (Optimizations.UseSse2)
  275. {
  276. EmitScalarUnaryOpSimd32(context, (m) =>
  277. {
  278. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  279. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
  280. });
  281. }
  282. else
  283. {
  284. EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
  285. }
  286. }
  287. private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
  288. {
  289. Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
  290. if (signed)
  291. {
  292. return context.ConvertToFP(type, value);
  293. }
  294. else
  295. {
  296. return context.ConvertToFPUI(type, value);
  297. }
  298. }
  299. private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  300. {
  301. // A port of the similar round function in InstEmitSimdCvt.
  302. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  303. bool doubleSize = (op.Size & 1) != 0;
  304. int shift = doubleSize ? 1 : 2;
  305. Operand n = GetVecA32(op.Vm >> shift);
  306. n = EmitSwapScalar(context, n, op.Vm, doubleSize);
  307. if (!doubleSize)
  308. {
  309. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
  310. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  311. nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
  312. Operand zero = context.VectorZero();
  313. Operand nCmp;
  314. Operand nIntOrLong2 = null;
  315. if (!signed)
  316. {
  317. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  318. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  319. }
  320. int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
  321. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  322. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  323. if (!signed)
  324. {
  325. nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
  326. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  327. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  328. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  329. }
  330. nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  331. Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
  332. Operand dRes;
  333. if (signed)
  334. {
  335. dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
  336. }
  337. else
  338. {
  339. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
  340. dRes = context.Add(dRes, nIntOrLong);
  341. }
  342. InsertScalar(context, op.Vd, dRes);
  343. }
  344. else
  345. {
  346. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
  347. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  348. nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
  349. Operand zero = context.VectorZero();
  350. Operand nCmp;
  351. Operand nIntOrLong2 = null;
  352. if (!signed)
  353. {
  354. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  355. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  356. }
  357. long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
  358. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  359. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  360. if (!signed)
  361. {
  362. nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
  363. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  364. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  365. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  366. }
  367. nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  368. Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
  369. nLong = context.ConvertI64ToI32(nLong);
  370. Operand dRes;
  371. if (signed)
  372. {
  373. dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
  374. }
  375. else
  376. {
  377. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
  378. dRes = context.Add(dRes, nIntOrLong);
  379. }
  380. InsertScalar(context, op.Vd, dRes);
  381. }
  382. }
  383. private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  384. {
  385. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  386. EmitVectorUnaryOpSimd32(context, (n) =>
  387. {
  388. int sizeF = op.Size & 1;
  389. if (sizeF == 0)
  390. {
  391. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  392. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  393. nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
  394. Operand zero = context.VectorZero();
  395. Operand nCmp;
  396. if (!signed)
  397. {
  398. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  399. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  400. }
  401. Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  402. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  403. Operand nInt2 = null;
  404. if (!signed)
  405. {
  406. nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
  407. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  408. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  409. nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  410. }
  411. nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  412. if (signed)
  413. {
  414. return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
  415. }
  416. else
  417. {
  418. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
  419. return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
  420. }
  421. }
  422. else /* if (sizeF == 1) */
  423. {
  424. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  425. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  426. nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
  427. Operand zero = context.VectorZero();
  428. Operand nCmp;
  429. if (!signed)
  430. {
  431. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  432. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  433. }
  434. Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  435. Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  436. Operand nLong2 = null;
  437. if (!signed)
  438. {
  439. nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
  440. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  441. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  442. nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  443. }
  444. nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  445. if (signed)
  446. {
  447. return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
  448. }
  449. else
  450. {
  451. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
  452. return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
  453. }
  454. }
  455. });
  456. }
  457. }
  458. }