InstEmitSimdCvt.cs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using static ARMeilleure.Instructions.InstEmitHelper;
  8. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  9. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  10. namespace ARMeilleure.Instructions
  11. {
  12. using Func1I = Func<Operand, Operand>;
  13. static partial class InstEmit
  14. {
  15. public static void Fcvt_S(ArmEmitterContext context)
  16. {
  17. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  18. if (op.Size == 0 && op.Opc == 1) // Single -> Double.
  19. {
  20. if (Optimizations.UseSse2)
  21. {
  22. Operand n = GetVec(op.Rn);
  23. Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
  24. context.Copy(GetVec(op.Rd), res);
  25. }
  26. else
  27. {
  28. Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  29. Operand res = context.ConvertToFP(OperandType.FP64, ne);
  30. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  31. }
  32. }
  33. else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
  34. {
  35. if (Optimizations.UseSse2)
  36. {
  37. Operand n = GetVec(op.Rn);
  38. Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
  39. context.Copy(GetVec(op.Rd), res);
  40. }
  41. else
  42. {
  43. Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
  44. Operand res = context.ConvertToFP(OperandType.FP32, ne);
  45. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  46. }
  47. }
  48. else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
  49. {
  50. Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  51. Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
  52. Operand res = context.Call(dlg, ne);
  53. res = context.ZeroExtend16(OperandType.I64, res);
  54. context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
  55. }
  56. else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
  57. {
  58. Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
  59. Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
  60. Operand res = context.Call(dlg, ne);
  61. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  62. }
  63. else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
  64. {
  65. throw new NotImplementedException("Double-precision to half-precision.");
  66. }
  67. else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
  68. {
  69. throw new NotImplementedException("Half-precision to double-precision.");
  70. }
  71. else // Invalid encoding.
  72. {
  73. Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
  74. }
  75. }
  76. public static void Fcvtas_Gp(ArmEmitterContext context)
  77. {
  78. EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
  79. }
  80. public static void Fcvtau_Gp(ArmEmitterContext context)
  81. {
  82. EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
  83. }
  84. public static void Fcvtl_V(ArmEmitterContext context)
  85. {
  86. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  87. int sizeF = op.Size & 1;
  88. if (Optimizations.UseSse2 && sizeF == 1)
  89. {
  90. Operand n = GetVec(op.Rn);
  91. Operand res;
  92. if (op.RegisterSize == RegisterSize.Simd128)
  93. {
  94. res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
  95. }
  96. else
  97. {
  98. res = n;
  99. }
  100. res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
  101. context.Copy(GetVec(op.Rd), res);
  102. }
  103. else
  104. {
  105. Operand res = context.VectorZero();
  106. int elems = 4 >> sizeF;
  107. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  108. for (int index = 0; index < elems; index++)
  109. {
  110. if (sizeF == 0)
  111. {
  112. Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
  113. Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
  114. Operand e = context.Call(dlg, ne);
  115. res = context.VectorInsert(res, e, index);
  116. }
  117. else /* if (sizeF == 1) */
  118. {
  119. Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
  120. Operand e = context.ConvertToFP(OperandType.FP64, ne);
  121. res = context.VectorInsert(res, e, index);
  122. }
  123. }
  124. context.Copy(GetVec(op.Rd), res);
  125. }
  126. }
  127. public static void Fcvtms_Gp(ArmEmitterContext context)
  128. {
  129. EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
  130. }
  131. public static void Fcvtmu_Gp(ArmEmitterContext context)
  132. {
  133. EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
  134. }
  135. public static void Fcvtn_V(ArmEmitterContext context)
  136. {
  137. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  138. int sizeF = op.Size & 1;
  139. if (Optimizations.UseSse2 && sizeF == 1)
  140. {
  141. Operand d = GetVec(op.Rd);
  142. Operand n = GetVec(op.Rn);
  143. Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
  144. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n);
  145. nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
  146. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  147. ? Intrinsic.X86Movlhps
  148. : Intrinsic.X86Movhlps;
  149. res = context.AddIntrinsic(movInst, res, nInt);
  150. context.Copy(GetVec(op.Rd), res);
  151. }
  152. else
  153. {
  154. OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
  155. int elems = 4 >> sizeF;
  156. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  157. Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
  158. for (int index = 0; index < elems; index++)
  159. {
  160. Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
  161. if (sizeF == 0)
  162. {
  163. Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
  164. Operand e = context.Call(dlg, ne);
  165. e = context.ZeroExtend16(OperandType.I64, e);
  166. res = EmitVectorInsert(context, res, e, part + index, 1);
  167. }
  168. else /* if (sizeF == 1) */
  169. {
  170. Operand e = context.ConvertToFP(OperandType.FP32, ne);
  171. res = context.VectorInsert(res, e, part + index);
  172. }
  173. }
  174. context.Copy(GetVec(op.Rd), res);
  175. }
  176. }
  177. public static void Fcvtns_S(ArmEmitterContext context)
  178. {
  179. if (Optimizations.UseSse41)
  180. {
  181. EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true);
  182. }
  183. else
  184. {
  185. EmitFcvtn(context, signed: true, scalar: true);
  186. }
  187. }
  188. public static void Fcvtns_V(ArmEmitterContext context)
  189. {
  190. if (Optimizations.UseSse41)
  191. {
  192. EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false);
  193. }
  194. else
  195. {
  196. EmitFcvtn(context, signed: true, scalar: false);
  197. }
  198. }
  199. public static void Fcvtnu_S(ArmEmitterContext context)
  200. {
  201. if (Optimizations.UseSse41)
  202. {
  203. EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true);
  204. }
  205. else
  206. {
  207. EmitFcvtn(context, signed: false, scalar: true);
  208. }
  209. }
  210. public static void Fcvtnu_V(ArmEmitterContext context)
  211. {
  212. if (Optimizations.UseSse41)
  213. {
  214. EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false);
  215. }
  216. else
  217. {
  218. EmitFcvtn(context, signed: false, scalar: false);
  219. }
  220. }
  221. public static void Fcvtps_Gp(ArmEmitterContext context)
  222. {
  223. EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
  224. }
  225. public static void Fcvtpu_Gp(ArmEmitterContext context)
  226. {
  227. EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
  228. }
  229. public static void Fcvtzs_Gp(ArmEmitterContext context)
  230. {
  231. EmitFcvt_s_Gp(context, (op1) => op1);
  232. }
  233. public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
  234. {
  235. EmitFcvtzs_Gp_Fixed(context);
  236. }
  237. public static void Fcvtzs_S(ArmEmitterContext context)
  238. {
  239. if (Optimizations.UseSse41)
  240. {
  241. EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true);
  242. }
  243. else
  244. {
  245. EmitFcvtz(context, signed: true, scalar: true);
  246. }
  247. }
  248. public static void Fcvtzs_V(ArmEmitterContext context)
  249. {
  250. if (Optimizations.UseSse41)
  251. {
  252. EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
  253. }
  254. else
  255. {
  256. EmitFcvtz(context, signed: true, scalar: false);
  257. }
  258. }
  259. public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
  260. {
  261. if (Optimizations.UseSse41)
  262. {
  263. EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
  264. }
  265. else
  266. {
  267. EmitFcvtz(context, signed: true, scalar: false);
  268. }
  269. }
  270. public static void Fcvtzu_Gp(ArmEmitterContext context)
  271. {
  272. EmitFcvt_u_Gp(context, (op1) => op1);
  273. }
  274. public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
  275. {
  276. EmitFcvtzu_Gp_Fixed(context);
  277. }
  278. public static void Fcvtzu_S(ArmEmitterContext context)
  279. {
  280. if (Optimizations.UseSse41)
  281. {
  282. EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true);
  283. }
  284. else
  285. {
  286. EmitFcvtz(context, signed: false, scalar: true);
  287. }
  288. }
  289. public static void Fcvtzu_V(ArmEmitterContext context)
  290. {
  291. if (Optimizations.UseSse41)
  292. {
  293. EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
  294. }
  295. else
  296. {
  297. EmitFcvtz(context, signed: false, scalar: false);
  298. }
  299. }
  300. public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
  301. {
  302. if (Optimizations.UseSse41)
  303. {
  304. EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
  305. }
  306. else
  307. {
  308. EmitFcvtz(context, signed: false, scalar: false);
  309. }
  310. }
  311. public static void Scvtf_Gp(ArmEmitterContext context)
  312. {
  313. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  314. Operand res = GetIntOrZR(context, op.Rn);
  315. if (op.RegisterSize == RegisterSize.Int32)
  316. {
  317. res = context.SignExtend32(OperandType.I64, res);
  318. }
  319. res = EmitFPConvert(context, res, op.Size, signed: true);
  320. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  321. }
  322. public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
  323. {
  324. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  325. Operand res = GetIntOrZR(context, op.Rn);
  326. if (op.RegisterSize == RegisterSize.Int32)
  327. {
  328. res = context.SignExtend32(OperandType.I64, res);
  329. }
  330. res = EmitFPConvert(context, res, op.Size, signed: true);
  331. res = EmitI2fFBitsMul(context, res, op.FBits);
  332. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  333. }
  334. public static void Scvtf_S(ArmEmitterContext context)
  335. {
  336. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  337. int sizeF = op.Size & 1;
  338. if (Optimizations.UseSse2 && sizeF == 0)
  339. {
  340. EmitSse2Scvtf(context, scalar: true);
  341. }
  342. else
  343. {
  344. Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
  345. res = EmitFPConvert(context, res, op.Size, signed: true);
  346. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  347. }
  348. }
  349. public static void Scvtf_V(ArmEmitterContext context)
  350. {
  351. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  352. int sizeF = op.Size & 1;
  353. if (Optimizations.UseSse2 && sizeF == 0)
  354. {
  355. EmitSse2Scvtf(context, scalar: false);
  356. }
  357. else
  358. {
  359. EmitVectorCvtf(context, signed: true);
  360. }
  361. }
  362. public static void Scvtf_V_Fixed(ArmEmitterContext context)
  363. {
  364. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  365. // sizeF == ((OpCodeSimdShImm64)op).Size - 2
  366. int sizeF = op.Size & 1;
  367. if (Optimizations.UseSse2 && sizeF == 0)
  368. {
  369. EmitSse2Scvtf(context, scalar: false);
  370. }
  371. else
  372. {
  373. EmitVectorCvtf(context, signed: true);
  374. }
  375. }
  376. public static void Ucvtf_Gp(ArmEmitterContext context)
  377. {
  378. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  379. Operand res = GetIntOrZR(context, op.Rn);
  380. res = EmitFPConvert(context, res, op.Size, signed: false);
  381. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  382. }
  383. public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
  384. {
  385. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  386. Operand res = GetIntOrZR(context, op.Rn);
  387. res = EmitFPConvert(context, res, op.Size, signed: false);
  388. res = EmitI2fFBitsMul(context, res, op.FBits);
  389. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  390. }
  391. public static void Ucvtf_S(ArmEmitterContext context)
  392. {
  393. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  394. int sizeF = op.Size & 1;
  395. if (Optimizations.UseSse2 && sizeF == 0)
  396. {
  397. EmitSse2Ucvtf(context, scalar: true);
  398. }
  399. else
  400. {
  401. Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
  402. Operand res = EmitFPConvert(context, ne, sizeF, signed: false);
  403. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  404. }
  405. }
  406. public static void Ucvtf_V(ArmEmitterContext context)
  407. {
  408. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  409. int sizeF = op.Size & 1;
  410. if (Optimizations.UseSse2 && sizeF == 0)
  411. {
  412. EmitSse2Ucvtf(context, scalar: false);
  413. }
  414. else
  415. {
  416. EmitVectorCvtf(context, signed: false);
  417. }
  418. }
  419. public static void Ucvtf_V_Fixed(ArmEmitterContext context)
  420. {
  421. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  422. // sizeF == ((OpCodeSimdShImm)op).Size - 2
  423. int sizeF = op.Size & 1;
  424. if (Optimizations.UseSse2 && sizeF == 0)
  425. {
  426. EmitSse2Ucvtf(context, scalar: false);
  427. }
  428. else
  429. {
  430. EmitVectorCvtf(context, signed: false);
  431. }
  432. }
  433. private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar)
  434. {
  435. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  436. Operand res = context.VectorZero();
  437. Operand n = GetVec(op.Rn);
  438. int sizeF = op.Size & 1;
  439. int sizeI = sizeF + 2;
  440. OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
  441. int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
  442. for (int index = 0; index < elems; index++)
  443. {
  444. Operand ne = context.VectorExtract(type, n, index);
  445. Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne);
  446. if (sizeF == 0)
  447. {
  448. Delegate dlg = signed
  449. ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
  450. : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
  451. e = context.Call(dlg, e);
  452. e = context.ZeroExtend32(OperandType.I64, e);
  453. }
  454. else /* if (sizeF == 1) */
  455. {
  456. Delegate dlg = signed
  457. ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
  458. : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
  459. e = context.Call(dlg, e);
  460. }
  461. res = EmitVectorInsert(context, res, e, index, sizeI);
  462. }
  463. context.Copy(GetVec(op.Rd), res);
  464. }
  465. private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
  466. {
  467. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  468. Operand res = context.VectorZero();
  469. Operand n = GetVec(op.Rn);
  470. int sizeF = op.Size & 1;
  471. int sizeI = sizeF + 2;
  472. OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
  473. int fBits = GetFBits(context);
  474. int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
  475. for (int index = 0; index < elems; index++)
  476. {
  477. Operand ne = context.VectorExtract(type, n, index);
  478. Operand e = EmitF2iFBitsMul(context, ne, fBits);
  479. if (sizeF == 0)
  480. {
  481. Delegate dlg = signed
  482. ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
  483. : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
  484. e = context.Call(dlg, e);
  485. e = context.ZeroExtend32(OperandType.I64, e);
  486. }
  487. else /* if (sizeF == 1) */
  488. {
  489. Delegate dlg = signed
  490. ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
  491. : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
  492. e = context.Call(dlg, e);
  493. }
  494. res = EmitVectorInsert(context, res, e, index, sizeI);
  495. }
  496. context.Copy(GetVec(op.Rd), res);
  497. }
  498. private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
  499. {
  500. EmitFcvt___Gp(context, emit, signed: true);
  501. }
  502. private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
  503. {
  504. EmitFcvt___Gp(context, emit, signed: false);
  505. }
  506. private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
  507. {
  508. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  509. OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
  510. Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
  511. Operand res = signed
  512. ? EmitScalarFcvts(context, emit(ne), 0)
  513. : EmitScalarFcvtu(context, emit(ne), 0);
  514. SetIntOrZR(context, op.Rd, res);
  515. }
  516. private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
  517. {
  518. EmitFcvtz__Gp_Fixed(context, signed: true);
  519. }
  520. private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
  521. {
  522. EmitFcvtz__Gp_Fixed(context, signed: false);
  523. }
  524. private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
  525. {
  526. OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
  527. OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
  528. Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
  529. Operand res = signed
  530. ? EmitScalarFcvts(context, ne, op.FBits)
  531. : EmitScalarFcvtu(context, ne, op.FBits);
  532. SetIntOrZR(context, op.Rd, res);
  533. }
  534. private static void EmitVectorCvtf(ArmEmitterContext context, bool signed)
  535. {
  536. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  537. Operand res = context.VectorZero();
  538. int sizeF = op.Size & 1;
  539. int sizeI = sizeF + 2;
  540. int fBits = GetFBits(context);
  541. int elems = op.GetBytesCount() >> sizeI;
  542. for (int index = 0; index < elems; index++)
  543. {
  544. Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
  545. Operand e = EmitFPConvert(context, ne, sizeF, signed);
  546. e = EmitI2fFBitsMul(context, e, fBits);
  547. res = context.VectorInsert(res, e, index);
  548. }
  549. context.Copy(GetVec(op.Rd), res);
  550. }
  551. private static int GetFBits(ArmEmitterContext context)
  552. {
  553. if (context.CurrOp is OpCodeSimdShImm op)
  554. {
  555. return GetImmShr(op);
  556. }
  557. return 0;
  558. }
  559. private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
  560. {
  561. Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
  562. Debug.Assert((uint)size < 2);
  563. OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
  564. if (signed)
  565. {
  566. return context.ConvertToFP(type, value);
  567. }
  568. else
  569. {
  570. return context.ConvertToFPUI(type, value);
  571. }
  572. }
  573. private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
  574. {
  575. Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
  576. value = EmitF2iFBitsMul(context, value, fBits);
  577. if (context.CurrOp.RegisterSize == RegisterSize.Int32)
  578. {
  579. Delegate dlg = value.Type == OperandType.FP32
  580. ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
  581. : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32);
  582. return context.Call(dlg, value);
  583. }
  584. else
  585. {
  586. Delegate dlg = value.Type == OperandType.FP32
  587. ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64)
  588. : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64);
  589. return context.Call(dlg, value);
  590. }
  591. }
  592. private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
  593. {
  594. Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
  595. value = EmitF2iFBitsMul(context, value, fBits);
  596. if (context.CurrOp.RegisterSize == RegisterSize.Int32)
  597. {
  598. Delegate dlg = value.Type == OperandType.FP32
  599. ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32)
  600. : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32);
  601. return context.Call(dlg, value);
  602. }
  603. else
  604. {
  605. Delegate dlg = value.Type == OperandType.FP32
  606. ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64)
  607. : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
  608. return context.Call(dlg, value);
  609. }
  610. }
  611. private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
  612. {
  613. Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
  614. if (fBits == 0)
  615. {
  616. return value;
  617. }
  618. if (value.Type == OperandType.FP32)
  619. {
  620. return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
  621. }
  622. else /* if (value.Type == OperandType.FP64) */
  623. {
  624. return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
  625. }
  626. }
  627. private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
  628. {
  629. Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
  630. if (fBits == 0)
  631. {
  632. return value;
  633. }
  634. if (value.Type == OperandType.FP32)
  635. {
  636. return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
  637. }
  638. else /* if (value.Type == OperandType.FP64) */
  639. {
  640. return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
  641. }
  642. }
  643. private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
  644. {
  645. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  646. Operand n = GetVec(op.Rn);
  647. // sizeF == ((OpCodeSimdShImm64)op).Size - 2
  648. int sizeF = op.Size & 1;
  649. if (sizeF == 0)
  650. {
  651. Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  652. Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
  653. if (op is OpCodeSimdShImm fixedOp)
  654. {
  655. int fBits = GetImmShr(fixedOp);
  656. // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
  657. int fpScaled = 0x3F800000 + fBits * 0x800000;
  658. Operand scale = X86GetAllElements(context, fpScaled);
  659. nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
  660. }
  661. Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
  662. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd);
  663. Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  664. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const((int)CmpCondition.NotLessThan));
  665. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
  666. if (scalar)
  667. {
  668. res = context.VectorZeroUpper96(res);
  669. }
  670. else if (op.RegisterSize == RegisterSize.Simd64)
  671. {
  672. res = context.VectorZeroUpper64(res);
  673. }
  674. context.Copy(GetVec(op.Rd), res);
  675. }
  676. else /* if (sizeF == 1) */
  677. {
  678. Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  679. Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
  680. if (op is OpCodeSimdShImm fixedOp)
  681. {
  682. int fBits = GetImmShr(fixedOp);
  683. // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
  684. long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
  685. Operand scale = X86GetAllElements(context, fpScaled);
  686. nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
  687. }
  688. Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
  689. Operand high;
  690. if (!scalar)
  691. {
  692. high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd);
  693. high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
  694. }
  695. else
  696. {
  697. high = Const(0L);
  698. }
  699. Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd);
  700. Operand nInt = EmitVectorLongCreate(context, low, high);
  701. Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  702. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const((int)CmpCondition.NotLessThan));
  703. Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
  704. if (scalar)
  705. {
  706. res = context.VectorZeroUpper64(res);
  707. }
  708. context.Copy(GetVec(op.Rd), res);
  709. }
  710. }
  711. private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
  712. {
  713. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  714. Operand n = GetVec(op.Rn);
  715. // sizeF == ((OpCodeSimdShImm)op).Size - 2
  716. int sizeF = op.Size & 1;
  717. if (sizeF == 0)
  718. {
  719. Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
  720. Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
  721. if (op is OpCodeSimdShImm fixedOp)
  722. {
  723. int fBits = GetImmShr(fixedOp);
  724. // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
  725. int fpScaled = 0x3F800000 + fBits * 0x800000;
  726. Operand scale = X86GetAllElements(context, fpScaled);
  727. nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
  728. }
  729. Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
  730. Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
  731. Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
  732. Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked);
  733. Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
  734. Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
  735. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
  736. Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
  737. res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
  738. Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const((int)CmpCondition.NotLessThan));
  739. res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
  740. res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
  741. if (scalar)
  742. {
  743. res = context.VectorZeroUpper96(res);
  744. }
  745. else if (op.RegisterSize == RegisterSize.Simd64)
  746. {
  747. res = context.VectorZeroUpper64(res);
  748. }
  749. context.Copy(GetVec(op.Rd), res);
  750. }
  751. else /* if (sizeF == 1) */
  752. {
  753. Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
  754. Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
  755. if (op is OpCodeSimdShImm fixedOp)
  756. {
  757. int fBits = GetImmShr(fixedOp);
  758. // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
  759. long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
  760. Operand scale = X86GetAllElements(context, fpScaled);
  761. nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
  762. }
  763. Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
  764. Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
  765. Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
  766. Operand high;
  767. if (!scalar)
  768. {
  769. high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked);
  770. high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
  771. }
  772. else
  773. {
  774. high = Const(0L);
  775. }
  776. Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked);
  777. Operand nInt = EmitVectorLongCreate(context, low, high);
  778. Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
  779. Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
  780. Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
  781. Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
  782. if (!scalar)
  783. {
  784. high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked);
  785. high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
  786. }
  787. low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked);
  788. res = EmitVectorLongCreate(context, low, high);
  789. Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const((int)CmpCondition.NotLessThan));
  790. res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
  791. res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
  792. if (scalar)
  793. {
  794. res = context.VectorZeroUpper64(res);
  795. }
  796. context.Copy(GetVec(op.Rd), res);
  797. }
  798. }
  799. private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar)
  800. {
  801. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  802. Operand n = GetVec(op.Rn);
  803. Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
  804. if (op is OpCodeSimdShImm fixedOp)
  805. {
  806. int fBits = GetImmShr(fixedOp);
  807. // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
  808. int fpScaled = 0x3F800000 - fBits * 0x800000;
  809. Operand scale = X86GetAllElements(context, fpScaled);
  810. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
  811. }
  812. if (scalar)
  813. {
  814. res = context.VectorZeroUpper96(res);
  815. }
  816. else if (op.RegisterSize == RegisterSize.Simd64)
  817. {
  818. res = context.VectorZeroUpper64(res);
  819. }
  820. context.Copy(GetVec(op.Rd), res);
  821. }
  822. private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar)
  823. {
  824. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  825. Operand n = GetVec(op.Rn);
  826. Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
  827. res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
  828. Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16)
  829. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
  830. Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
  831. res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
  832. res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
  833. res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
  834. if (op is OpCodeSimdShImm fixedOp)
  835. {
  836. int fBits = GetImmShr(fixedOp);
  837. // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
  838. int fpScaled = 0x3F800000 - fBits * 0x800000;
  839. Operand scale = X86GetAllElements(context, fpScaled);
  840. res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
  841. }
  842. if (scalar)
  843. {
  844. res = context.VectorZeroUpper96(res);
  845. }
  846. else if (op.RegisterSize == RegisterSize.Simd64)
  847. {
  848. res = context.VectorZeroUpper64(res);
  849. }
  850. context.Copy(GetVec(op.Rd), res);
  851. }
  852. private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
  853. {
  854. OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
  855. return context.VectorExtract(type, GetVec(reg), index);
  856. }
  857. private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high)
  858. {
  859. Operand vector = context.VectorCreateScalar(low);
  860. vector = context.VectorInsert(vector, high, 1);
  861. return vector;
  862. }
  863. }
  864. }