InstEmitSimdCvt32.cs 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  11. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  12. namespace ARMeilleure.Instructions
  13. {
  14. static partial class InstEmit32
  15. {
  16. private static int FlipVdBits(int vd, bool lowBit)
  17. {
  18. if (lowBit)
  19. {
  20. // Move the low bit to the top.
  21. return ((vd & 0x1) << 4) | (vd >> 1);
  22. }
  23. else
  24. {
  25. // Move the high bit to the bottom.
  26. return ((vd & 0xf) << 1) | (vd >> 4);
  27. }
  28. }
  29. private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
  30. {
  31. MethodInfo info;
  32. if (op1.Type == OperandType.FP64)
  33. {
  34. info = unsigned
  35. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))
  36. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
  37. }
  38. else
  39. {
  40. info = unsigned
  41. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
  42. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32));
  43. }
  44. return context.Call(info, op1);
  45. }
  46. public static void Vcvt_V(ArmEmitterContext context)
  47. {
  48. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  49. bool unsigned = (op.Opc & 1) != 0;
  50. bool toInteger = (op.Opc & 2) != 0;
  51. OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
  52. if (toInteger)
  53. {
  54. if (Optimizations.UseSse41)
  55. {
  56. EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
  57. }
  58. else
  59. {
  60. EmitVectorUnaryOpF32(context, (op1) =>
  61. {
  62. return EmitSaturateFloatToInt(context, op1, unsigned);
  63. });
  64. }
  65. }
  66. else
  67. {
  68. if (Optimizations.UseSse2)
  69. {
  70. EmitVectorUnaryOpSimd32(context, (n) =>
  71. {
  72. if (unsigned)
  73. {
  74. Operand mask = X86GetAllElements(context, 0x47800000);
  75. Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
  76. res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
  77. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
  78. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
  79. res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
  80. res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
  81. return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
  82. }
  83. else
  84. {
  85. return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
  86. }
  87. });
  88. }
  89. else
  90. {
  91. if (unsigned)
  92. {
  93. EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
  94. }
  95. else
  96. {
  97. EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
  98. }
  99. }
  100. }
  101. }
  102. public static void Vcvt_FD(ArmEmitterContext context)
  103. {
  104. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  105. int vm = op.Vm;
  106. int vd;
  107. if (op.Size == 3)
  108. {
  109. vd = FlipVdBits(op.Vd, false);
  110. // Double to single.
  111. Operand fp = ExtractScalar(context, OperandType.FP64, vm);
  112. Operand res = context.ConvertToFP(OperandType.FP32, fp);
  113. InsertScalar(context, vd, res);
  114. }
  115. else
  116. {
  117. vd = FlipVdBits(op.Vd, true);
  118. // Single to double.
  119. Operand fp = ExtractScalar(context, OperandType.FP32, vm);
  120. Operand res = context.ConvertToFP(OperandType.FP64, fp);
  121. InsertScalar(context, vd, res);
  122. }
  123. }
  124. // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
  125. public static void Vcvt_FI(ArmEmitterContext context)
  126. {
  127. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  128. bool toInteger = (op.Opc2 & 0b100) != 0;
  129. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  130. if (toInteger)
  131. {
  132. bool unsigned = (op.Opc2 & 1) == 0;
  133. bool roundWithFpscr = op.Opc != 1;
  134. if (!roundWithFpscr && Optimizations.UseSse41)
  135. {
  136. EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
  137. }
  138. else
  139. {
  140. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  141. // TODO: Fast Path.
  142. if (roundWithFpscr)
  143. {
  144. toConvert = EmitRoundByRMode(context, toConvert);
  145. }
  146. // Round towards zero.
  147. Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  148. InsertScalar(context, op.Vd, asInteger);
  149. }
  150. }
  151. else
  152. {
  153. bool unsigned = op.Opc == 0;
  154. Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
  155. Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
  156. InsertScalar(context, op.Vd, asFloat);
  157. }
  158. }
  159. private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  160. {
  161. IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
  162. string name = nameof(Math.Round);
  163. MethodInfo info = (op.Size & 1) == 0
  164. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
  165. : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
  166. return context.Call(info, n, Const((int)roundMode));
  167. }
  168. private static FPRoundingMode RMToRoundMode(int rm)
  169. {
  170. FPRoundingMode roundMode;
  171. switch (rm)
  172. {
  173. case 0b00:
  174. roundMode = FPRoundingMode.ToNearestAway;
  175. break;
  176. case 0b01:
  177. roundMode = FPRoundingMode.ToNearest;
  178. break;
  179. case 0b10:
  180. roundMode = FPRoundingMode.TowardsPlusInfinity;
  181. break;
  182. case 0b11:
  183. roundMode = FPRoundingMode.TowardsMinusInfinity;
  184. break;
  185. default:
  186. throw new ArgumentOutOfRangeException(nameof(rm));
  187. }
  188. return roundMode;
  189. }
  190. // VCVTA/M/N/P (floating-point).
  191. public static void Vcvt_RM(ArmEmitterContext context)
  192. {
  193. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
  194. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  195. bool unsigned = op.Opc == 0;
  196. int rm = op.Opc2 & 3;
  197. if (Optimizations.UseSse41)
  198. {
  199. EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
  200. }
  201. else
  202. {
  203. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  204. switch (rm)
  205. {
  206. case 0b00: // Away
  207. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  208. break;
  209. case 0b01: // Nearest
  210. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  211. break;
  212. case 0b10: // Towards positive infinity
  213. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  214. break;
  215. case 0b11: // Towards negative infinity
  216. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  217. break;
  218. }
  219. Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  220. InsertScalar(context, op.Vd, asInteger);
  221. }
  222. }
  223. public static void Vcvt_TB(ArmEmitterContext context)
  224. {
  225. OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp;
  226. if (Optimizations.UseF16c)
  227. {
  228. Debug.Assert(!Optimizations.ForceLegacySse);
  229. if (op.Op)
  230. {
  231. Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
  232. if (op.Size == 1)
  233. {
  234. res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res);
  235. }
  236. res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
  237. res = context.VectorExtract16(res, 0);
  238. InsertScalar16(context, op.Vd, op.T, res);
  239. }
  240. else
  241. {
  242. Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T));
  243. res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
  244. if (op.Size == 1)
  245. {
  246. res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
  247. }
  248. res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0);
  249. InsertScalar(context, op.Vd, res);
  250. }
  251. }
  252. else
  253. {
  254. if (op.Op)
  255. {
  256. // Convert to half.
  257. Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm);
  258. MethodInfo method = op.Size == 1
  259. ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
  260. : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
  261. context.StoreToContext();
  262. Operand res = context.Call(method, src);
  263. context.LoadFromContext();
  264. InsertScalar16(context, op.Vd, op.T, res);
  265. }
  266. else
  267. {
  268. // Convert from half.
  269. Operand src = ExtractScalar16(context, op.Vm, op.T);
  270. MethodInfo method = op.Size == 1
  271. ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
  272. : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
  273. context.StoreToContext();
  274. Operand res = context.Call(method, src);
  275. context.LoadFromContext();
  276. InsertScalar(context, op.Vd, res);
  277. }
  278. }
  279. }
  280. // VRINTA/M/N/P (floating-point).
  281. public static void Vrint_RM(ArmEmitterContext context)
  282. {
  283. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  284. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  285. int rm = op.Opc2 & 3;
  286. if (Optimizations.UseSse41)
  287. {
  288. EmitScalarUnaryOpSimd32(context, (m) =>
  289. {
  290. FPRoundingMode roundMode = RMToRoundMode(rm);
  291. if (roundMode != FPRoundingMode.ToNearestAway)
  292. {
  293. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  294. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
  295. }
  296. else
  297. {
  298. return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true);
  299. }
  300. });
  301. }
  302. else
  303. {
  304. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  305. switch (rm)
  306. {
  307. case 0b00: // Away
  308. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  309. break;
  310. case 0b01: // Nearest
  311. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  312. break;
  313. case 0b10: // Towards positive infinity
  314. toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert);
  315. break;
  316. case 0b11: // Towards negative infinity
  317. toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert);
  318. break;
  319. }
  320. InsertScalar(context, op.Vd, toConvert);
  321. }
  322. }
  323. // VRINTA (vector).
  324. public static void Vrinta_V(ArmEmitterContext context)
  325. {
  326. if (Optimizations.UseSse41)
  327. {
  328. EmitVectorUnaryOpSimd32(context, (m) =>
  329. {
  330. return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
  331. });
  332. }
  333. else
  334. {
  335. EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
  336. }
  337. }
  338. // VRINTM (vector).
  339. public static void Vrintm_V(ArmEmitterContext context)
  340. {
  341. if (Optimizations.UseSse2)
  342. {
  343. EmitVectorUnaryOpSimd32(context, (m) =>
  344. {
  345. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
  346. });
  347. }
  348. else
  349. {
  350. EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
  351. }
  352. }
  353. // VRINTN (vector).
  354. public static void Vrintn_V(ArmEmitterContext context)
  355. {
  356. if (Optimizations.UseSse2)
  357. {
  358. EmitVectorUnaryOpSimd32(context, (m) =>
  359. {
  360. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
  361. });
  362. }
  363. else
  364. {
  365. EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
  366. }
  367. }
  368. // VRINTP (vector).
  369. public static void Vrintp_V(ArmEmitterContext context)
  370. {
  371. if (Optimizations.UseSse2)
  372. {
  373. EmitVectorUnaryOpSimd32(context, (m) =>
  374. {
  375. return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
  376. });
  377. }
  378. else
  379. {
  380. EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
  381. }
  382. }
  383. // VRINTZ (floating-point).
  384. public static void Vrint_Z(ArmEmitterContext context)
  385. {
  386. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  387. if (Optimizations.UseSse2)
  388. {
  389. EmitScalarUnaryOpSimd32(context, (m) =>
  390. {
  391. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  392. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
  393. });
  394. }
  395. else
  396. {
  397. EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1));
  398. }
  399. }
  400. // VRINTX (floating-point).
  401. public static void Vrintx_S(ArmEmitterContext context)
  402. {
  403. EmitScalarUnaryOpF32(context, (op1) =>
  404. {
  405. return EmitRoundByRMode(context, op1);
  406. });
  407. }
  408. private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
  409. {
  410. Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
  411. if (signed)
  412. {
  413. return context.ConvertToFP(type, value);
  414. }
  415. else
  416. {
  417. return context.ConvertToFPUI(type, value);
  418. }
  419. }
  420. private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  421. {
  422. // A port of the similar round function in InstEmitSimdCvt.
  423. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  424. bool doubleSize = (op.Size & 1) != 0;
  425. int shift = doubleSize ? 1 : 2;
  426. Operand n = GetVecA32(op.Vm >> shift);
  427. n = EmitSwapScalar(context, n, op.Vm, doubleSize);
  428. if (!doubleSize)
  429. {
  430. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
  431. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  432. if (roundMode != FPRoundingMode.ToNearestAway)
  433. {
  434. nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
  435. }
  436. else
  437. {
  438. nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
  439. }
  440. Operand zero = context.VectorZero();
  441. Operand nCmp;
  442. Operand nIntOrLong2 = default;
  443. if (!signed)
  444. {
  445. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  446. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  447. }
  448. int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
  449. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  450. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  451. if (!signed)
  452. {
  453. nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
  454. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  455. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  456. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  457. }
  458. nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  459. Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
  460. Operand dRes;
  461. if (signed)
  462. {
  463. dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
  464. }
  465. else
  466. {
  467. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
  468. dRes = context.Add(dRes, nIntOrLong);
  469. }
  470. InsertScalar(context, op.Vd, dRes);
  471. }
  472. else
  473. {
  474. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
  475. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  476. if (roundMode != FPRoundingMode.ToNearestAway)
  477. {
  478. nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
  479. }
  480. else
  481. {
  482. nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
  483. }
  484. Operand zero = context.VectorZero();
  485. Operand nCmp;
  486. Operand nIntOrLong2 = default;
  487. if (!signed)
  488. {
  489. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  490. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  491. }
  492. long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
  493. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  494. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  495. if (!signed)
  496. {
  497. nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
  498. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  499. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  500. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  501. }
  502. nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  503. Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
  504. nLong = context.ConvertI64ToI32(nLong);
  505. Operand dRes;
  506. if (signed)
  507. {
  508. dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
  509. }
  510. else
  511. {
  512. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
  513. dRes = context.Add(dRes, nIntOrLong);
  514. }
  515. InsertScalar(context, op.Vd, dRes);
  516. }
  517. }
  518. private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  519. {
  520. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  521. EmitVectorUnaryOpSimd32(context, (n) =>
  522. {
  523. int sizeF = op.Size & 1;
  524. if (sizeF == 0)
  525. {
  526. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  527. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  528. nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
  529. Operand zero = context.VectorZero();
  530. Operand nCmp;
  531. if (!signed)
  532. {
  533. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  534. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  535. }
  536. Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  537. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  538. Operand nInt2 = default;
  539. if (!signed)
  540. {
  541. nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
  542. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  543. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  544. nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  545. }
  546. nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  547. if (signed)
  548. {
  549. return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
  550. }
  551. else
  552. {
  553. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
  554. return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
  555. }
  556. }
  557. else /* if (sizeF == 1) */
  558. {
  559. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  560. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  561. nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
  562. Operand zero = context.VectorZero();
  563. Operand nCmp;
  564. if (!signed)
  565. {
  566. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  567. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  568. }
  569. Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  570. Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  571. Operand nLong2 = default;
  572. if (!signed)
  573. {
  574. nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
  575. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  576. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  577. nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  578. }
  579. nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  580. if (signed)
  581. {
  582. return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
  583. }
  584. else
  585. {
  586. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
  587. return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
  588. }
  589. }
  590. });
  591. }
  592. }
  593. }