InstEmitSimdCmp32.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  9. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  10. namespace ARMeilleure.Instructions
  11. {
  12. using Func2I = Func<Operand, Operand, Operand>;
  13. static partial class InstEmit32
  14. {
  15. public static void Vceq_V(ArmEmitterContext context)
  16. {
  17. if (Optimizations.FastFP && Optimizations.UseSse2)
  18. {
  19. EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
  20. }
  21. else
  22. {
  23. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), false);
  24. }
  25. }
  26. public static void Vceq_I(ArmEmitterContext context)
  27. {
  28. EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false);
  29. }
  30. public static void Vceq_Z(ArmEmitterContext context)
  31. {
  32. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  33. if (op.F)
  34. {
  35. if (Optimizations.FastFP && Optimizations.UseSse2)
  36. {
  37. EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
  38. }
  39. else
  40. {
  41. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true);
  42. }
  43. }
  44. else
  45. {
  46. EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false);
  47. }
  48. }
  49. public static void Vcge_V(ArmEmitterContext context)
  50. {
  51. if (Optimizations.FastFP && Optimizations.UseAvx)
  52. {
  53. EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
  54. }
  55. else
  56. {
  57. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), false);
  58. }
  59. }
  60. public static void Vcge_I(ArmEmitterContext context)
  61. {
  62. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  63. EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U);
  64. }
  65. public static void Vcge_Z(ArmEmitterContext context)
  66. {
  67. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  68. if (op.F)
  69. {
  70. if (Optimizations.FastFP && Optimizations.UseAvx)
  71. {
  72. EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
  73. }
  74. else
  75. {
  76. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true);
  77. }
  78. }
  79. else
  80. {
  81. EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true);
  82. }
  83. }
  84. public static void Vcgt_V(ArmEmitterContext context)
  85. {
  86. if (Optimizations.FastFP && Optimizations.UseAvx)
  87. {
  88. EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
  89. }
  90. else
  91. {
  92. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), false);
  93. }
  94. }
  95. public static void Vcgt_I(ArmEmitterContext context)
  96. {
  97. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  98. EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U);
  99. }
  100. public static void Vcgt_Z(ArmEmitterContext context)
  101. {
  102. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  103. if (op.F)
  104. {
  105. if (Optimizations.FastFP && Optimizations.UseAvx)
  106. {
  107. EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
  108. }
  109. else
  110. {
  111. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), true);
  112. }
  113. }
  114. else
  115. {
  116. EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true);
  117. }
  118. }
  119. public static void Vcle_Z(ArmEmitterContext context)
  120. {
  121. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  122. if (op.F)
  123. {
  124. if (Optimizations.FastFP && Optimizations.UseSse2)
  125. {
  126. EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
  127. }
  128. else
  129. {
  130. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLEFpscr), true);
  131. }
  132. }
  133. else
  134. {
  135. EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true);
  136. }
  137. }
  138. public static void Vclt_Z(ArmEmitterContext context)
  139. {
  140. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  141. if (op.F)
  142. {
  143. if (Optimizations.FastFP && Optimizations.UseSse2)
  144. {
  145. EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
  146. }
  147. else
  148. {
  149. EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLTFpscr), true);
  150. }
  151. }
  152. else
  153. {
  154. EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true);
  155. }
  156. }
  157. private static void EmitCmpOpF32(ArmEmitterContext context, string name, bool zero)
  158. {
  159. if (zero)
  160. {
  161. EmitVectorUnaryOpF32(context, (m) =>
  162. {
  163. Operand zeroOp = m.Type == OperandType.FP64 ? ConstF(0.0d) : ConstF(0.0f);
  164. return EmitSoftFloatCallDefaultFpscr(context, name, m, zeroOp);
  165. });
  166. }
  167. else
  168. {
  169. EmitVectorBinaryOpF32(context, (n, m) =>
  170. {
  171. return EmitSoftFloatCallDefaultFpscr(context, name, n, m);
  172. });
  173. }
  174. }
  175. private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType)
  176. {
  177. var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1);
  178. return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L));
  179. }
  180. private static void EmitCmpOpI32(
  181. ArmEmitterContext context,
  182. Func2I signedOp,
  183. Func2I unsignedOp,
  184. bool zero,
  185. bool signed)
  186. {
  187. if (zero)
  188. {
  189. if (signed)
  190. {
  191. EmitVectorUnaryOpSx32(context, (m) =>
  192. {
  193. OperandType type = m.Type;
  194. Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
  195. return ZerosOrOnes(context, signedOp(m, zeroV), type);
  196. });
  197. }
  198. else
  199. {
  200. EmitVectorUnaryOpZx32(context, (m) =>
  201. {
  202. OperandType type = m.Type;
  203. Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
  204. return ZerosOrOnes(context, unsignedOp(m, zeroV), type);
  205. });
  206. }
  207. }
  208. else
  209. {
  210. if (signed)
  211. {
  212. EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type));
  213. }
  214. else
  215. {
  216. EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type));
  217. }
  218. }
  219. }
  220. public static void Vcmp(ArmEmitterContext context)
  221. {
  222. EmitVcmpOrVcmpe(context, false);
  223. }
  224. public static void Vcmpe(ArmEmitterContext context)
  225. {
  226. EmitVcmpOrVcmpe(context, true);
  227. }
  228. private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
  229. {
  230. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  231. bool cmpWithZero = (op.Opc & 2) != 0;
  232. int sizeF = op.Size & 1;
  233. if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
  234. {
  235. CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
  236. bool doubleSize = sizeF != 0;
  237. int shift = doubleSize ? 1 : 2;
  238. Operand m = GetVecA32(op.Vm >> shift);
  239. Operand n = GetVecA32(op.Vd >> shift);
  240. n = EmitSwapScalar(context, n, op.Vd, doubleSize);
  241. m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
  242. Operand lblNaN = Label();
  243. Operand lblEnd = Label();
  244. if (!doubleSize)
  245. {
  246. Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
  247. Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
  248. context.BranchIfFalse(lblNaN, isOrdered);
  249. Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
  250. Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
  251. Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
  252. SetFpFlag(context, FPState.VFlag, Const(0));
  253. SetFpFlag(context, FPState.CFlag, cf);
  254. SetFpFlag(context, FPState.ZFlag, zf);
  255. SetFpFlag(context, FPState.NFlag, nf);
  256. }
  257. else
  258. {
  259. Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
  260. Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
  261. context.BranchIfFalse(lblNaN, isOrdered);
  262. Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
  263. Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
  264. Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
  265. SetFpFlag(context, FPState.VFlag, Const(0));
  266. SetFpFlag(context, FPState.CFlag, cf);
  267. SetFpFlag(context, FPState.ZFlag, zf);
  268. SetFpFlag(context, FPState.NFlag, nf);
  269. }
  270. context.Branch(lblEnd);
  271. context.MarkLabel(lblNaN);
  272. SetFpFlag(context, FPState.VFlag, Const(1));
  273. SetFpFlag(context, FPState.CFlag, Const(1));
  274. SetFpFlag(context, FPState.ZFlag, Const(0));
  275. SetFpFlag(context, FPState.NFlag, Const(0));
  276. context.MarkLabel(lblEnd);
  277. }
  278. else
  279. {
  280. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  281. Operand ne = ExtractScalar(context, type, op.Vd);
  282. Operand me;
  283. if (cmpWithZero)
  284. {
  285. me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
  286. }
  287. else
  288. {
  289. me = ExtractScalar(context, type, op.Vm);
  290. }
  291. Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs));
  292. EmitSetFpscrNzcv(context, nzcv);
  293. }
  294. }
  295. private static void EmitSetFpscrNzcv(ArmEmitterContext context, Operand nzcv)
  296. {
  297. Operand Extract(Operand value, int bit)
  298. {
  299. if (bit != 0)
  300. {
  301. value = context.ShiftRightUI(value, Const(bit));
  302. }
  303. value = context.BitwiseAnd(value, Const(1));
  304. return value;
  305. }
  306. SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0));
  307. SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1));
  308. SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
  309. SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
  310. }
  311. private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
  312. {
  313. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  314. int sizeF = op.Size & 1;
  315. Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
  316. if (zero)
  317. {
  318. EmitVectorUnaryOpSimd32(context, (m) =>
  319. {
  320. return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
  321. });
  322. }
  323. else
  324. {
  325. EmitVectorBinaryOpSimd32(context, (n, m) =>
  326. {
  327. return context.AddIntrinsic(inst, n, m, Const((int)cond));
  328. });
  329. }
  330. }
  331. }
  332. }