InstEmitSimdCvt32.cs 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  11. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  12. namespace ARMeilleure.Instructions
  13. {
  14. static partial class InstEmit32
  15. {
  16. private static int FlipVdBits(int vd, bool lowBit)
  17. {
  18. if (lowBit)
  19. {
  20. // Move the low bit to the top.
  21. return ((vd & 0x1) << 4) | (vd >> 1);
  22. }
  23. else
  24. {
  25. // Move the high bit to the bottom.
  26. return ((vd & 0xf) << 1) | (vd >> 4);
  27. }
  28. }
  29. private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
  30. {
  31. MethodInfo info;
  32. if (op1.Type == OperandType.FP64)
  33. {
  34. info = unsigned
  35. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
  36. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
  37. }
  38. else
  39. {
  40. info = unsigned
  41. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
  42. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
  43. }
  44. return context.Call(info, op1);
  45. }
  46. public static void Vcvt_V(ArmEmitterContext context)
  47. {
  48. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  49. bool unsigned = (op.Opc & 1) != 0;
  50. bool toInteger = (op.Opc & 2) != 0;
  51. OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
  52. if (toInteger)
  53. {
  54. if (Optimizations.UseAdvSimd)
  55. {
  56. InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
  57. }
  58. else if (Optimizations.UseSse41)
  59. {
  60. EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
  61. }
  62. else
  63. {
  64. EmitVectorUnaryOpF32(context, (op1) =>
  65. {
  66. return EmitSaturateFloatToInt(context, op1, unsigned);
  67. });
  68. }
  69. }
  70. else
  71. {
  72. if (Optimizations.UseSse2)
  73. {
  74. EmitVectorUnaryOpSimd32(context, (n) =>
  75. {
  76. if (unsigned)
  77. {
  78. Operand mask = X86GetAllElements(context, 0x47800000);
  79. Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
  80. res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
  81. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
  82. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
  83. res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
  84. res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
  85. return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
  86. }
  87. else
  88. {
  89. return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
  90. }
  91. });
  92. }
  93. else
  94. {
  95. if (unsigned)
  96. {
  97. EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
  98. }
  99. else
  100. {
  101. EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
  102. }
  103. }
  104. }
  105. }
  106. public static void Vcvt_FD(ArmEmitterContext context)
  107. {
  108. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  109. int vm = op.Vm;
  110. int vd;
  111. if (op.Size == 3)
  112. {
  113. vd = FlipVdBits(op.Vd, false);
  114. // Double to single.
  115. Operand fp = ExtractScalar(context, OperandType.FP64, vm);
  116. Operand res = context.ConvertToFP(OperandType.FP32, fp);
  117. InsertScalar(context, vd, res);
  118. }
  119. else
  120. {
  121. vd = FlipVdBits(op.Vd, true);
  122. // Single to double.
  123. Operand fp = ExtractScalar(context, OperandType.FP32, vm);
  124. Operand res = context.ConvertToFP(OperandType.FP64, fp);
  125. InsertScalar(context, vd, res);
  126. }
  127. }
  128. // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
  129. public static void Vcvt_FI(ArmEmitterContext context)
  130. {
  131. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  132. bool toInteger = (op.Opc2 & 0b100) != 0;
  133. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  134. if (toInteger)
  135. {
  136. bool unsigned = (op.Opc2 & 1) == 0;
  137. bool roundWithFpscr = op.Opc != 1;
  138. if (!roundWithFpscr && Optimizations.UseAdvSimd)
  139. {
  140. bool doubleSize = floatSize == OperandType.FP64;
  141. if (doubleSize)
  142. {
  143. Operand m = GetVecA32(op.Vm >> 1);
  144. Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
  145. Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
  146. Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
  147. InsertScalar(context, op.Vd, asInteger);
  148. }
  149. else
  150. {
  151. InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
  152. }
  153. }
  154. else if (!roundWithFpscr && Optimizations.UseSse41)
  155. {
  156. EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
  157. }
  158. else
  159. {
  160. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  161. // TODO: Fast Path.
  162. if (roundWithFpscr)
  163. {
  164. toConvert = EmitRoundByRMode(context, toConvert);
  165. }
  166. // Round towards zero.
  167. Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  168. InsertScalar(context, op.Vd, asInteger);
  169. }
  170. }
  171. else
  172. {
  173. bool unsigned = op.Opc == 0;
  174. Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
  175. Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
  176. InsertScalar(context, op.Vd, asFloat);
  177. }
  178. }
  179. private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  180. {
  181. IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
  182. string name = nameof(Math.Round);
  183. MethodInfo info = (op.Size & 1) == 0
  184. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
  185. : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
  186. return context.Call(info, n, Const((int)roundMode));
  187. }
  188. private static FPRoundingMode RMToRoundMode(int rm)
  189. {
  190. FPRoundingMode roundMode;
  191. switch (rm)
  192. {
  193. case 0b00:
  194. roundMode = FPRoundingMode.ToNearestAway;
  195. break;
  196. case 0b01:
  197. roundMode = FPRoundingMode.ToNearest;
  198. break;
  199. case 0b10:
  200. roundMode = FPRoundingMode.TowardsPlusInfinity;
  201. break;
  202. case 0b11:
  203. roundMode = FPRoundingMode.TowardsMinusInfinity;
  204. break;
  205. default:
  206. throw new ArgumentOutOfRangeException(nameof(rm));
  207. }
  208. return roundMode;
  209. }
  210. // VCVTA/M/N/P (floating-point).
  211. public static void Vcvt_RM(ArmEmitterContext context)
  212. {
  213. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
  214. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  215. bool unsigned = op.Opc == 0;
  216. int rm = op.Opc2 & 3;
  217. Intrinsic inst;
  218. if (Optimizations.UseAdvSimd)
  219. {
  220. if (unsigned)
  221. {
  222. inst = rm switch {
  223. 0b00 => Intrinsic.Arm64FcvtauS,
  224. 0b01 => Intrinsic.Arm64FcvtnuS,
  225. 0b10 => Intrinsic.Arm64FcvtpuS,
  226. 0b11 => Intrinsic.Arm64FcvtmuS,
  227. _ => throw new ArgumentOutOfRangeException(nameof(rm))
  228. };
  229. }
  230. else
  231. {
  232. inst = rm switch {
  233. 0b00 => Intrinsic.Arm64FcvtasS,
  234. 0b01 => Intrinsic.Arm64FcvtnsS,
  235. 0b10 => Intrinsic.Arm64FcvtpsS,
  236. 0b11 => Intrinsic.Arm64FcvtmsS,
  237. _ => throw new ArgumentOutOfRangeException(nameof(rm))
  238. };
  239. }
  240. InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
  241. }
  242. else if (Optimizations.UseSse41)
  243. {
  244. EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
  245. }
  246. else
  247. {
  248. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  249. switch (rm)
  250. {
  251. case 0b00: // Away
  252. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  253. break;
  254. case 0b01: // Nearest
  255. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  256. break;
  257. case 0b10: // Towards positive infinity
  258. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  259. break;
  260. case 0b11: // Towards negative infinity
  261. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  262. break;
  263. }
  264. Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  265. InsertScalar(context, op.Vd, asInteger);
  266. }
  267. }
  268. public static void Vcvt_TB(ArmEmitterContext context)
  269. {
  270. OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
  271. if (Optimizations.UseF16c)
  272. {
  273. Debug.Assert(!Optimizations.ForceLegacySse);
  274. if (op.Op)
  275. {
  276. Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
  277. if (op.Size == 1)
  278. {
  279. res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
  280. }
  281. res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
  282. res = context.VectorExtract16(res, 0);
  283. InsertScalar16(context, op.Vd, op.T, res);
  284. }
  285. else
  286. {
  287. Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
  288. res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
  289. if (op.Size == 1)
  290. {
  291. res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
  292. }
  293. res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
  294. InsertScalar(context, op.Vd, res);
  295. }
  296. }
  297. else
  298. {
  299. if (op.Op)
  300. {
  301. // Convert to half.
  302. Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
  303. MethodInfo method = op.Size == 1
  304. ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
  305. : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
  306. context.ExitArmFpMode();
  307. context.StoreToContext();
  308. Operand res = context.Call(method, src);
  309. context.LoadFromContext();
  310. context.EnterArmFpMode();
  311. InsertScalar16(context, op.Vd, op.T, res);
  312. }
  313. else
  314. {
  315. // Convert from half.
  316. Operand src = ExtractScalar16(context, op.Vm, op.T);
  317. MethodInfo method = op.Size == 1
  318. ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
  319. : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
  320. context.ExitArmFpMode();
  321. context.StoreToContext();
  322. Operand res = context.Call(method, src);
  323. context.LoadFromContext();
  324. context.EnterArmFpMode();
  325. InsertScalar(context, op.Vd, res);
  326. }
  327. }
  328. }
  329. // VRINTA/M/N/P (floating-point).
  330. public static void Vrint_RM(ArmEmitterContext context)
  331. {
  332. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  333. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  334. int rm = op.Opc2 & 3;
  335. if (Optimizations.UseAdvSimd)
  336. {
  337. Intrinsic inst = rm switch {
  338. 0b00 => Intrinsic.Arm64FrintaS,
  339. 0b01 => Intrinsic.Arm64FrintnS,
  340. 0b10 => Intrinsic.Arm64FrintpS,
  341. 0b11 => Intrinsic.Arm64FrintmS,
  342. _ => throw new ArgumentOutOfRangeException(nameof(rm))
  343. };
  344. InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
  345. }
  346. else if (Optimizations.UseSse41)
  347. {
  348. EmitScalarUnaryOpSimd32(context, (m) =>
  349. {
  350. FPRoundingMode roundMode = RMToRoundMode(rm);
  351. if (roundMode != FPRoundingMode.ToNearestAway)
  352. {
  353. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  354. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
  355. }
  356. else
  357. {
  358. return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
  359. }
  360. });
  361. }
  362. else
  363. {
  364. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  365. switch (rm)
  366. {
  367. case 0b00: // Away
  368. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  369. break;
  370. case 0b01: // Nearest
  371. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  372. break;
  373. case 0b10: // Towards positive infinity
  374. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  375. break;
  376. case 0b11: // Towards negative infinity
  377. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  378. break;
  379. }
  380. InsertScalar(context, op.Vd, toConvert);
  381. }
  382. }
  383. // VRINTA (vector).
  384. public static void Vrinta_V(ArmEmitterContext context)
  385. {
  386. if (Optimizations.UseAdvSimd)
  387. {
  388. InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
  389. }
  390. else
  391. {
  392. EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
  393. }
  394. }
  395. // VRINTM (vector).
  396. public static void Vrintm_V(ArmEmitterContext context)
  397. {
  398. if (Optimizations.UseAdvSimd)
  399. {
  400. InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
  401. }
  402. else if (Optimizations.UseSse2)
  403. {
  404. EmitVectorUnaryOpSimd32(context, (m) =>
  405. {
  406. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
  407. });
  408. }
  409. else
  410. {
  411. EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
  412. }
  413. }
  414. // VRINTN (vector).
  415. public static void Vrintn_V(ArmEmitterContext context)
  416. {
  417. if (Optimizations.UseAdvSimd)
  418. {
  419. InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
  420. }
  421. else if (Optimizations.UseSse2)
  422. {
  423. EmitVectorUnaryOpSimd32(context, (m) =>
  424. {
  425. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
  426. });
  427. }
  428. else
  429. {
  430. EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
  431. }
  432. }
  433. // VRINTP (vector).
  434. public static void Vrintp_V(ArmEmitterContext context)
  435. {
  436. if (Optimizations.UseAdvSimd)
  437. {
  438. InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
  439. }
  440. else if (Optimizations.UseSse2)
  441. {
  442. EmitVectorUnaryOpSimd32(context, (m) =>
  443. {
  444. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
  445. });
  446. }
  447. else
  448. {
  449. EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
  450. }
  451. }
  452. // VRINTZ (floating-point).
  453. public static void Vrint_Z(ArmEmitterContext context)
  454. {
  455. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  456. if (Optimizations.UseAdvSimd)
  457. {
  458. InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
  459. }
  460. else if (Optimizations.UseSse2)
  461. {
  462. EmitScalarUnaryOpSimd32(context, (m) =>
  463. {
  464. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  465. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
  466. });
  467. }
  468. else
  469. {
  470. EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
  471. }
  472. }
  473. // VRINTX (floating-point).
  474. public static void Vrintx_S(ArmEmitterContext context)
  475. {
  476. if (Optimizations.UseAdvSimd)
  477. {
  478. InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
  479. }
  480. else
  481. {
  482. EmitScalarUnaryOpF32(context, (op1) =>
  483. {
  484. return EmitRoundByRMode(context, op1);
  485. });
  486. }
  487. }
  488. private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
  489. {
  490. Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
  491. if (signed)
  492. {
  493. return context.ConvertToFP(type, value);
  494. }
  495. else
  496. {
  497. return context.ConvertToFPUI(type, value);
  498. }
  499. }
  500. private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  501. {
  502. // A port of the similar round function in InstEmitSimdCvt.
  503. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  504. bool doubleSize = (op.Size & 1) != 0;
  505. int shift = doubleSize ? 1 : 2;
  506. Operand n = GetVecA32(op.Vm >> shift);
  507. n = EmitSwapScalar(context, n, op.Vm, doubleSize);
  508. if (!doubleSize)
  509. {
  510. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
  511. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  512. if (roundMode != FPRoundingMode.ToNearestAway)
  513. {
  514. nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
  515. }
  516. else
  517. {
  518. nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
  519. }
  520. Operand zero = context.VectorZero();
  521. Operand nCmp;
  522. Operand nIntOrLong2 = default;
  523. if (!signed)
  524. {
  525. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  526. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  527. }
  528. int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
  529. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  530. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  531. if (!signed)
  532. {
  533. nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
  534. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  535. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  536. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  537. }
  538. nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  539. Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
  540. Operand dRes;
  541. if (signed)
  542. {
  543. dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
  544. }
  545. else
  546. {
  547. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
  548. dRes = context.Add(dRes, nIntOrLong);
  549. }
  550. InsertScalar(context, op.Vd, dRes);
  551. }
  552. else
  553. {
  554. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
  555. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  556. if (roundMode != FPRoundingMode.ToNearestAway)
  557. {
  558. nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
  559. }
  560. else
  561. {
  562. nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
  563. }
  564. Operand zero = context.VectorZero();
  565. Operand nCmp;
  566. Operand nIntOrLong2 = default;
  567. if (!signed)
  568. {
  569. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  570. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  571. }
  572. long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
  573. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  574. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  575. if (!signed)
  576. {
  577. nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
  578. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  579. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  580. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  581. }
  582. nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  583. Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
  584. nLong = context.ConvertI64ToI32(nLong);
  585. Operand dRes;
  586. if (signed)
  587. {
  588. dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
  589. }
  590. else
  591. {
  592. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
  593. dRes = context.Add(dRes, nIntOrLong);
  594. }
  595. InsertScalar(context, op.Vd, dRes);
  596. }
  597. }
  598. private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  599. {
  600. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  601. EmitVectorUnaryOpSimd32(context, (n) =>
  602. {
  603. int sizeF = op.Size & 1;
  604. if (sizeF == 0)
  605. {
  606. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  607. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  608. nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
  609. Operand zero = context.VectorZero();
  610. Operand nCmp;
  611. if (!signed)
  612. {
  613. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  614. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  615. }
  616. Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  617. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  618. Operand nInt2 = default;
  619. if (!signed)
  620. {
  621. nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
  622. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  623. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  624. nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  625. }
  626. nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  627. if (signed)
  628. {
  629. return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
  630. }
  631. else
  632. {
  633. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
  634. return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
  635. }
  636. }
  637. else /* if (sizeF == 1) */
  638. {
  639. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  640. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  641. nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
  642. Operand zero = context.VectorZero();
  643. Operand nCmp;
  644. if (!signed)
  645. {
  646. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  647. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  648. }
  649. Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  650. Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  651. Operand nLong2 = default;
  652. if (!signed)
  653. {
  654. nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
  655. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  656. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  657. nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  658. }
  659. nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  660. if (signed)
  661. {
  662. return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
  663. }
  664. else
  665. {
  666. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
  667. return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
  668. }
  669. }
  670. });
  671. }
  672. }
  673. }