InstEmitSimdCmp32.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using static ARMeilleure.Instructions.InstEmitHelper;
  7. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8. using static ARMeilleure.Instructions.InstEmitSimdHelper32;
  9. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  10. namespace ARMeilleure.Instructions
  11. {
  12. using Func2I = Func<Operand, Operand, Operand>;
  13. static partial class InstEmit32
  14. {
  15. public static void Vceq_V(ArmEmitterContext context)
  16. {
  17. if (Optimizations.FastFP && Optimizations.UseSse2)
  18. {
  19. EmitSse2CmpOpF32(context, CmpCondition.Equal, false);
  20. }
  21. else
  22. {
  23. EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
  24. }
  25. }
  26. public static void Vceq_I(ArmEmitterContext context)
  27. {
  28. EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false);
  29. }
  30. public static void Vceq_Z(ArmEmitterContext context)
  31. {
  32. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  33. if (op.F)
  34. {
  35. if (Optimizations.FastFP && Optimizations.UseSse2)
  36. {
  37. EmitSse2CmpOpF32(context, CmpCondition.Equal, true);
  38. }
  39. else
  40. {
  41. EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
  42. }
  43. }
  44. else
  45. {
  46. EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false);
  47. }
  48. }
  49. public static void Vcge_V(ArmEmitterContext context)
  50. {
  51. if (Optimizations.FastFP && Optimizations.UseSse2)
  52. {
  53. EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
  54. }
  55. else
  56. {
  57. EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
  58. }
  59. }
  60. public static void Vcge_I(ArmEmitterContext context)
  61. {
  62. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  63. EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U);
  64. }
  65. public static void Vcge_Z(ArmEmitterContext context)
  66. {
  67. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  68. if (op.F)
  69. {
  70. if (Optimizations.FastFP && Optimizations.UseSse2)
  71. {
  72. EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
  73. }
  74. else
  75. {
  76. EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
  77. }
  78. }
  79. else
  80. {
  81. EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true);
  82. }
  83. }
  84. public static void Vcgt_V(ArmEmitterContext context)
  85. {
  86. if (Optimizations.FastFP && Optimizations.UseSse2)
  87. {
  88. EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false);
  89. }
  90. else
  91. {
  92. EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
  93. }
  94. }
  95. public static void Vcgt_I(ArmEmitterContext context)
  96. {
  97. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  98. EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U);
  99. }
  100. public static void Vcgt_Z(ArmEmitterContext context)
  101. {
  102. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  103. if (op.F)
  104. {
  105. if (Optimizations.FastFP && Optimizations.UseSse2)
  106. {
  107. EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true);
  108. }
  109. else
  110. {
  111. EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
  112. }
  113. }
  114. else
  115. {
  116. EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true);
  117. }
  118. }
  119. public static void Vcle_Z(ArmEmitterContext context)
  120. {
  121. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  122. if (op.F)
  123. {
  124. if (Optimizations.FastFP && Optimizations.UseSse2)
  125. {
  126. EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
  127. }
  128. else
  129. {
  130. EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
  131. }
  132. }
  133. else
  134. {
  135. EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true);
  136. }
  137. }
  138. public static void Vclt_Z(ArmEmitterContext context)
  139. {
  140. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  141. if (op.F)
  142. {
  143. if (Optimizations.FastFP && Optimizations.UseSse2)
  144. {
  145. EmitSse2CmpOpF32(context, CmpCondition.LessThan, true);
  146. }
  147. else
  148. {
  149. EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
  150. }
  151. }
  152. else
  153. {
  154. EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true);
  155. }
  156. }
  157. private static void EmitCmpOpF32(
  158. ArmEmitterContext context,
  159. _F32_F32_F32_Bool f32,
  160. _F64_F64_F64_Bool f64,
  161. bool zero)
  162. {
  163. Operand one = Const(1);
  164. if (zero)
  165. {
  166. EmitVectorUnaryOpF32(context, (m) =>
  167. {
  168. OperandType type = m.Type;
  169. if (type == OperandType.FP64)
  170. {
  171. return context.Call(f64, m, ConstF(0.0), one);
  172. }
  173. else
  174. {
  175. return context.Call(f32, m, ConstF(0.0f), one);
  176. }
  177. });
  178. }
  179. else
  180. {
  181. EmitVectorBinaryOpF32(context, (n, m) =>
  182. {
  183. OperandType type = n.Type;
  184. if (type == OperandType.FP64)
  185. {
  186. return context.Call(f64, n, m, one);
  187. }
  188. else
  189. {
  190. return context.Call(f32, n, m, one);
  191. }
  192. });
  193. }
  194. }
  195. private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType)
  196. {
  197. var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1);
  198. return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L));
  199. }
  200. private static void EmitCmpOpI32(
  201. ArmEmitterContext context,
  202. Func2I signedOp,
  203. Func2I unsignedOp,
  204. bool zero,
  205. bool signed)
  206. {
  207. if (zero)
  208. {
  209. if (signed)
  210. {
  211. EmitVectorUnaryOpSx32(context, (m) =>
  212. {
  213. OperandType type = m.Type;
  214. Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
  215. return ZerosOrOnes(context, signedOp(m, zeroV), type);
  216. });
  217. }
  218. else
  219. {
  220. EmitVectorUnaryOpZx32(context, (m) =>
  221. {
  222. OperandType type = m.Type;
  223. Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0);
  224. return ZerosOrOnes(context, unsignedOp(m, zeroV), type);
  225. });
  226. }
  227. }
  228. else
  229. {
  230. if (signed)
  231. {
  232. EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type));
  233. }
  234. else
  235. {
  236. EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type));
  237. }
  238. }
  239. }
  240. public static void Vcmp(ArmEmitterContext context)
  241. {
  242. EmitVcmpOrVcmpe(context, false);
  243. }
  244. public static void Vcmpe(ArmEmitterContext context)
  245. {
  246. EmitVcmpOrVcmpe(context, true);
  247. }
  248. private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
  249. {
  250. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  251. bool cmpWithZero = (op.Opc & 2) != 0;
  252. int sizeF = op.Size & 1;
  253. if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
  254. {
  255. CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
  256. bool doubleSize = sizeF != 0;
  257. int shift = doubleSize ? 1 : 2;
  258. Operand m = GetVecA32(op.Vm >> shift);
  259. Operand n = GetVecA32(op.Vd >> shift);
  260. n = EmitSwapScalar(context, n, op.Vd, doubleSize);
  261. m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
  262. Operand lblNaN = Label();
  263. Operand lblEnd = Label();
  264. if (!doubleSize)
  265. {
  266. Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
  267. Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
  268. context.BranchIfFalse(lblNaN, isOrdered);
  269. Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
  270. Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
  271. Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
  272. EmitSetFPSCRFlags(context, nf, zf, cf, Const(0));
  273. }
  274. else
  275. {
  276. Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
  277. Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
  278. context.BranchIfFalse(lblNaN, isOrdered);
  279. Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
  280. Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
  281. Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
  282. EmitSetFPSCRFlags(context, nf, zf, cf, Const(0));
  283. }
  284. context.Branch(lblEnd);
  285. context.MarkLabel(lblNaN);
  286. EmitSetFPSCRFlags(context, Const(3));
  287. context.MarkLabel(lblEnd);
  288. }
  289. else
  290. {
  291. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  292. Operand ne = ExtractScalar(context, type, op.Vd);
  293. Operand me;
  294. if (cmpWithZero)
  295. {
  296. me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
  297. }
  298. else
  299. {
  300. me = ExtractScalar(context, type, op.Vm);
  301. }
  302. Delegate dlg = sizeF != 0
  303. ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare)
  304. : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare);
  305. Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs));
  306. EmitSetFPSCRFlags(context, nzcv);
  307. }
  308. }
  309. private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand nzcv)
  310. {
  311. Operand Extract(Operand value, int bit)
  312. {
  313. if (bit != 0)
  314. {
  315. value = context.ShiftRightUI(value, Const(bit));
  316. }
  317. value = context.BitwiseAnd(value, Const(1));
  318. return value;
  319. }
  320. SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0));
  321. SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1));
  322. SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
  323. SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
  324. }
  325. private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand n, Operand z, Operand c, Operand v)
  326. {
  327. SetFpFlag(context, FPState.VFlag, v);
  328. SetFpFlag(context, FPState.CFlag, c);
  329. SetFpFlag(context, FPState.ZFlag, z);
  330. SetFpFlag(context, FPState.NFlag, n);
  331. }
  332. private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
  333. {
  334. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  335. int sizeF = op.Size & 1;
  336. Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
  337. if (zero)
  338. {
  339. EmitVectorUnaryOpSimd32(context, (m) =>
  340. {
  341. return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
  342. });
  343. }
  344. else
  345. {
  346. EmitVectorBinaryOpSimd32(context, (n, m) =>
  347. {
  348. return context.AddIntrinsic(inst, n, m, Const((int)cond));
  349. });
  350. }
  351. }
  352. }
  353. }