InstEmitSimdCvt32.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using static ARMeilleure.Instructions.InstEmitHelper;
  8. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  10. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  11. namespace ARMeilleure.Instructions
  12. {
  13. static partial class InstEmit32
  14. {
  15. private static int FlipVdBits(int vd, bool lowBit)
  16. {
  17. if (lowBit)
  18. {
  19. // Move the low bit to the top.
  20. return ((vd & 0x1) << 4) | (vd >> 1);
  21. }
  22. else
  23. {
  24. // Move the high bit to the bottom.
  25. return ((vd & 0xf) << 1) | (vd >> 4);
  26. }
  27. }
  28. private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned)
  29. {
  30. if (op1.Type == OperandType.FP64)
  31. {
  32. if (unsigned)
  33. {
  34. return context.Call(new _U32_F64(SoftFallback.SatF64ToU32), op1);
  35. }
  36. else
  37. {
  38. return context.Call(new _S32_F64(SoftFallback.SatF64ToS32), op1);
  39. }
  40. }
  41. else
  42. {
  43. if (unsigned)
  44. {
  45. return context.Call(new _U32_F32(SoftFallback.SatF32ToU32), op1);
  46. }
  47. else
  48. {
  49. return context.Call(new _S32_F32(SoftFallback.SatF32ToS32), op1);
  50. }
  51. }
  52. }
  53. public static void Vcvt_V(ArmEmitterContext context)
  54. {
  55. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  56. bool unsigned = (op.Opc & 1) != 0;
  57. bool toInteger = (op.Opc & 2) != 0;
  58. OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64;
  59. if (toInteger)
  60. {
  61. if (Optimizations.UseSse41)
  62. {
  63. EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
  64. }
  65. else
  66. {
  67. EmitVectorUnaryOpF32(context, (op1) =>
  68. {
  69. return EmitSaturateFloatToInt(context, op1, unsigned);
  70. });
  71. }
  72. }
  73. else
  74. {
  75. if (Optimizations.UseSse2)
  76. {
  77. EmitVectorUnaryOpSimd32(context, (n) =>
  78. {
  79. if (unsigned)
  80. {
  81. Operand mask = X86GetAllElements(context, 0x47800000);
  82. Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
  83. res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
  84. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
  85. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
  86. res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
  87. res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
  88. return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
  89. }
  90. else
  91. {
  92. return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
  93. }
  94. });
  95. }
  96. else
  97. {
  98. if (unsigned)
  99. {
  100. EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
  101. }
  102. else
  103. {
  104. EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
  105. }
  106. }
  107. }
  108. }
  109. public static void Vcvt_FD(ArmEmitterContext context)
  110. {
  111. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  112. int vm = op.Vm;
  113. int vd;
  114. if (op.Size == 3)
  115. {
  116. vd = FlipVdBits(op.Vd, false);
  117. // Double to single.
  118. Operand fp = ExtractScalar(context, OperandType.FP64, vm);
  119. Operand res = context.ConvertToFP(OperandType.FP32, fp);
  120. InsertScalar(context, vd, res);
  121. }
  122. else
  123. {
  124. vd = FlipVdBits(op.Vd, true);
  125. // Single to double.
  126. Operand fp = ExtractScalar(context, OperandType.FP32, vm);
  127. Operand res = context.ConvertToFP(OperandType.FP64, fp);
  128. InsertScalar(context, vd, res);
  129. }
  130. }
  131. public static void Vcvt_FI(ArmEmitterContext context)
  132. {
  133. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  134. bool toInteger = (op.Opc2 & 0b100) != 0;
  135. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  136. if (toInteger)
  137. {
  138. bool unsigned = (op.Opc2 & 1) == 0;
  139. bool roundWithFpscr = op.Opc != 1;
  140. if (!roundWithFpscr && Optimizations.UseSse41)
  141. {
  142. EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
  143. }
  144. else
  145. {
  146. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  147. Operand asInteger;
  148. // TODO: Fast Path.
  149. if (roundWithFpscr)
  150. {
  151. if (floatSize == OperandType.FP64)
  152. {
  153. if (unsigned)
  154. {
  155. asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert);
  156. }
  157. else
  158. {
  159. asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert);
  160. }
  161. }
  162. else
  163. {
  164. if (unsigned)
  165. {
  166. asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert);
  167. }
  168. else
  169. {
  170. asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert);
  171. }
  172. }
  173. }
  174. else
  175. {
  176. // Round towards zero.
  177. asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  178. }
  179. InsertScalar(context, op.Vd, asInteger);
  180. }
  181. }
  182. else
  183. {
  184. bool unsigned = op.Opc == 0;
  185. Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm);
  186. Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned);
  187. InsertScalar(context, op.Vd, asFloat);
  188. }
  189. }
  190. public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  191. {
  192. IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
  193. Delegate dlg;
  194. if ((op.Size & 1) == 0)
  195. {
  196. dlg = new _F32_F32_MidpointRounding(MathF.Round);
  197. }
  198. else /* if ((op.Size & 1) == 1) */
  199. {
  200. dlg = new _F64_F64_MidpointRounding(Math.Round);
  201. }
  202. return context.Call(dlg, n, Const((int)roundMode));
  203. }
  204. private static FPRoundingMode RMToRoundMode(int rm)
  205. {
  206. FPRoundingMode roundMode;
  207. switch (rm)
  208. {
  209. case 0b01:
  210. roundMode = FPRoundingMode.ToNearest;
  211. break;
  212. case 0b10:
  213. roundMode = FPRoundingMode.TowardsPlusInfinity;
  214. break;
  215. case 0b11:
  216. roundMode = FPRoundingMode.TowardsMinusInfinity;
  217. break;
  218. default:
  219. throw new ArgumentOutOfRangeException(nameof(rm));
  220. }
  221. return roundMode;
  222. }
  223. public static void Vcvt_R(ArmEmitterContext context)
  224. {
  225. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  226. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  227. bool unsigned = (op.Opc & 1) == 0;
  228. int rm = op.Opc2 & 3;
  229. if (Optimizations.UseSse41 && rm != 0b00)
  230. {
  231. EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
  232. }
  233. else
  234. {
  235. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  236. switch (rm)
  237. {
  238. case 0b00: // Away
  239. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  240. break;
  241. case 0b01: // Nearest
  242. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  243. break;
  244. case 0b10: // Towards positive infinity
  245. toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
  246. break;
  247. case 0b11: // Towards negative infinity
  248. toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
  249. break;
  250. }
  251. Operand asInteger;
  252. asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
  253. InsertScalar(context, op.Vd, asInteger);
  254. }
  255. }
  256. public static void Vrint_RM(ArmEmitterContext context)
  257. {
  258. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
  259. OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
  260. int rm = op.Opc2 & 3;
  261. if (Optimizations.UseSse2 && rm != 0b00)
  262. {
  263. EmitScalarUnaryOpSimd32(context, (m) =>
  264. {
  265. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  266. FPRoundingMode roundMode = RMToRoundMode(rm);
  267. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
  268. });
  269. }
  270. else
  271. {
  272. Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
  273. switch (rm)
  274. {
  275. case 0b00: // Away
  276. toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
  277. break;
  278. case 0b01: // Nearest
  279. toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
  280. break;
  281. case 0b10: // Towards positive infinity
  282. toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
  283. break;
  284. case 0b11: // Towards negative infinity
  285. toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
  286. break;
  287. }
  288. InsertScalar(context, op.Vd, toConvert);
  289. }
  290. }
  291. public static void Vrint_Z(ArmEmitterContext context)
  292. {
  293. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  294. if (Optimizations.UseSse2)
  295. {
  296. EmitScalarUnaryOpSimd32(context, (m) =>
  297. {
  298. Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
  299. return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
  300. });
  301. }
  302. else
  303. {
  304. EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1));
  305. }
  306. }
  307. private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
  308. {
  309. Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
  310. if (signed)
  311. {
  312. return context.ConvertToFP(type, value);
  313. }
  314. else
  315. {
  316. return context.ConvertToFPUI(type, value);
  317. }
  318. }
  319. private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  320. {
  321. // A port of the similar round function in InstEmitSimdCvt.
  322. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  323. bool doubleSize = (op.Size & 1) != 0;
  324. int shift = doubleSize ? 1 : 2;
  325. Operand n = GetVecA32(op.Vm >> shift);
  326. n = EmitSwapScalar(context, n, op.Vm, doubleSize);
  327. if (!doubleSize)
  328. {
  329. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
  330. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  331. nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
  332. Operand zero = context.VectorZero();
  333. Operand nCmp;
  334. Operand nIntOrLong2 = null;
  335. if (!signed)
  336. {
  337. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  338. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  339. }
  340. int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
  341. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  342. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  343. if (!signed)
  344. {
  345. nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
  346. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  347. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  348. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
  349. }
  350. nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  351. Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
  352. Operand dRes;
  353. if (signed)
  354. {
  355. dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
  356. }
  357. else
  358. {
  359. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
  360. dRes = context.Add(dRes, nIntOrLong);
  361. }
  362. InsertScalar(context, op.Vd, dRes);
  363. }
  364. else
  365. {
  366. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
  367. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  368. nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
  369. Operand zero = context.VectorZero();
  370. Operand nCmp;
  371. Operand nIntOrLong2 = null;
  372. if (!signed)
  373. {
  374. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  375. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  376. }
  377. long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
  378. Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
  379. Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  380. if (!signed)
  381. {
  382. nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
  383. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  384. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  385. nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
  386. }
  387. nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  388. Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
  389. nLong = context.ConvertI64ToI32(nLong);
  390. Operand dRes;
  391. if (signed)
  392. {
  393. dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
  394. }
  395. else
  396. {
  397. dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
  398. dRes = context.Add(dRes, nIntOrLong);
  399. }
  400. InsertScalar(context, op.Vd, dRes);
  401. }
  402. }
  403. private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
  404. {
  405. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  406. EmitVectorUnaryOpSimd32(context, (n) =>
  407. {
  408. int sizeF = op.Size & 1;
  409. if (sizeF == 0)
  410. {
  411. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  412. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  413. nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
  414. Operand zero = context.VectorZero();
  415. Operand nCmp;
  416. if (!signed)
  417. {
  418. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  419. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  420. }
  421. Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  422. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  423. Operand nInt2 = null;
  424. if (!signed)
  425. {
  426. nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
  427. nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  428. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  429. nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
  430. }
  431. nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  432. if (signed)
  433. {
  434. return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
  435. }
  436. else
  437. {
  438. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
  439. return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
  440. }
  441. }
  442. else /* if (sizeF == 1) */
  443. {
  444. Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  445. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
  446. nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
  447. Operand zero = context.VectorZero();
  448. Operand nCmp;
  449. if (!signed)
  450. {
  451. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  452. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  453. }
  454. Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  455. Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  456. Operand nLong2 = null;
  457. if (!signed)
  458. {
  459. nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
  460. nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
  461. nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
  462. nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
  463. }
  464. nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
  465. if (signed)
  466. {
  467. return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
  468. }
  469. else
  470. {
  471. Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
  472. return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
  473. }
  474. }
  475. });
  476. }
  477. }
  478. }