InstEmitSimdHelper32Arm64.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using static ARMeilleure.Instructions.InstEmitHelper;
  8. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  9. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  10. namespace ARMeilleure.Instructions
  11. {
  12. using Func1I = Func<Operand, Operand>;
  13. using Func2I = Func<Operand, Operand, Operand>;
  14. using Func3I = Func<Operand, Operand, Operand, Operand>;
  15. static class InstEmitSimdHelper32Arm64
  16. {
  17. // Intrinsic Helpers
  18. public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
  19. {
  20. Debug.Assert(input.Type == OperandType.V128);
  21. int originalSide = originalV & 1;
  22. int targetSide = targetV & 1;
  23. if (originalSide == targetSide)
  24. {
  25. return input;
  26. }
  27. Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
  28. if (targetSide == 1)
  29. {
  30. return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
  31. }
  32. else
  33. {
  34. return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
  35. }
  36. }
  37. public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
  38. {
  39. Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
  40. int targetSide = targetV & 1;
  41. Operand idx = Const(targetSide);
  42. return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
  43. }
  44. public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
  45. {
  46. Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
  47. // Insert from index 0 in value to index in target.
  48. int index = reg & (doubleWidth ? 1 : 3);
  49. if (doubleWidth)
  50. {
  51. return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
  52. }
  53. else
  54. {
  55. return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
  56. }
  57. }
  58. public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
  59. {
  60. int index = reg & (doubleWidth ? 1 : 3);
  61. if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
  62. if (doubleWidth)
  63. {
  64. return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
  65. }
  66. else
  67. {
  68. return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
  69. }
  70. }
  71. // Vector Operand Templates
  72. public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
  73. {
  74. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  75. Operand m = GetVecA32(op.Qm);
  76. Operand d = GetVecA32(op.Qd);
  77. if (!op.Q) // Register swap: move relevant doubleword to destination side.
  78. {
  79. m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
  80. }
  81. Operand res = vectorFunc(m);
  82. if (!op.Q) // Register insert.
  83. {
  84. res = EmitDoubleWordInsert(context, d, res, op.Vd);
  85. }
  86. context.Copy(d, res);
  87. }
  88. public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
  89. {
  90. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  91. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  92. EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
  93. }
  94. public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
  95. {
  96. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  97. Operand n = GetVecA32(op.Qn);
  98. Operand m = GetVecA32(op.Qm);
  99. Operand d = GetVecA32(op.Qd);
  100. if (side == -1)
  101. {
  102. side = op.Vd;
  103. }
  104. if (!op.Q) // Register swap: move relevant doubleword to destination side.
  105. {
  106. n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
  107. m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
  108. }
  109. Operand res = vectorFunc(n, m);
  110. if (!op.Q) // Register insert.
  111. {
  112. if (side != op.Vd)
  113. {
  114. res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
  115. }
  116. res = EmitDoubleWordInsert(context, d, res, op.Vd);
  117. }
  118. context.Copy(d, res);
  119. }
  120. public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
  121. {
  122. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  123. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  124. EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
  125. }
  126. public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
  127. {
  128. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  129. Operand n = GetVecA32(op.Qn);
  130. Operand m = GetVecA32(op.Qm);
  131. Operand d = GetVecA32(op.Qd);
  132. Operand initialD = d;
  133. if (!op.Q) // Register swap: move relevant doubleword to destination side.
  134. {
  135. n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
  136. m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
  137. }
  138. Operand res = vectorFunc(d, n, m);
  139. if (!op.Q) // Register insert.
  140. {
  141. res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
  142. }
  143. context.Copy(initialD, res);
  144. }
  145. public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
  146. {
  147. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  148. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  149. EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
  150. }
  151. public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
  152. {
  153. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  154. bool doubleSize = (op.Size & 1) != 0;
  155. int shift = doubleSize ? 1 : 2;
  156. Operand m = GetVecA32(op.Vm >> shift);
  157. Operand d = GetVecA32(op.Vd >> shift);
  158. m = EmitExtractScalar(context, m, op.Vm, doubleSize);
  159. Operand res = scalarFunc(m);
  160. // Insert scalar into vector.
  161. res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
  162. context.Copy(d, res);
  163. }
  164. public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
  165. {
  166. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  167. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  168. EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
  169. }
  170. public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
  171. {
  172. OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
  173. bool doubleSize = (op.Size & 1) != 0;
  174. int shift = doubleSize ? 1 : 2;
  175. Operand n = GetVecA32(op.Vn >> shift);
  176. Operand m = GetVecA32(op.Vm >> shift);
  177. Operand d = GetVecA32(op.Vd >> shift);
  178. n = EmitExtractScalar(context, n, op.Vn, doubleSize);
  179. m = EmitExtractScalar(context, m, op.Vm, doubleSize);
  180. Operand res = scalarFunc(n, m);
  181. // Insert scalar into vector.
  182. res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
  183. context.Copy(d, res);
  184. }
  185. public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
  186. {
  187. OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
  188. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  189. EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
  190. }
  191. public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
  192. {
  193. OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
  194. bool doubleSize = (op.Size & 1) != 0;
  195. int shift = doubleSize ? 1 : 2;
  196. Operand n = GetVecA32(op.Vn >> shift);
  197. Operand m = GetVecA32(op.Vm >> shift);
  198. Operand d = GetVecA32(op.Vd >> shift);
  199. Operand initialD = d;
  200. n = EmitExtractScalar(context, n, op.Vn, doubleSize);
  201. m = EmitExtractScalar(context, m, op.Vm, doubleSize);
  202. d = EmitExtractScalar(context, d, op.Vd, doubleSize);
  203. Operand res = scalarFunc(d, n, m);
  204. // Insert scalar into vector.
  205. res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
  206. context.Copy(initialD, res);
  207. }
  208. public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
  209. {
  210. OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
  211. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  212. EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
  213. }
  214. // Pairwise
  215. public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
  216. {
  217. OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  218. inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
  219. EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
  220. }
  221. public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
  222. {
  223. OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  224. bool cmpWithZero = (op.Opc & 2) != 0;
  225. Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
  226. inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  227. bool doubleSize = (op.Size & 1) != 0;
  228. int shift = doubleSize ? 1 : 2;
  229. Operand n = GetVecA32(op.Vd >> shift);
  230. Operand m = GetVecA32(op.Vm >> shift);
  231. n = EmitExtractScalar(context, n, op.Vd, doubleSize);
  232. m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
  233. Operand nzcv = context.AddIntrinsicInt(inst, n, m);
  234. Operand one = Const(1);
  235. SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
  236. SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
  237. SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
  238. SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
  239. }
  240. public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
  241. {
  242. OpCode32Simd op = (OpCode32Simd)context.CurrOp;
  243. int sizeF = op.Size & 1;
  244. Intrinsic inst;
  245. if (zero)
  246. {
  247. inst = cond switch
  248. {
  249. CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
  250. CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
  251. CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
  252. CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
  253. CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
  254. _ => throw new InvalidOperationException()
  255. };
  256. }
  257. else {
  258. inst = cond switch
  259. {
  260. CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
  261. CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
  262. CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
  263. _ => throw new InvalidOperationException()
  264. };
  265. }
  266. inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
  267. if (zero)
  268. {
  269. EmitVectorUnaryOpSimd32(context, (m) =>
  270. {
  271. return context.AddIntrinsic(inst, m);
  272. });
  273. }
  274. else
  275. {
  276. EmitVectorBinaryOpSimd32(context, (n, m) =>
  277. {
  278. return context.AddIntrinsic(inst, n, m);
  279. });
  280. }
  281. }
  282. }
  283. }