InstEmitSimdHelper.cs 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567
  1. using ARMeilleure.Decoders;
  2. using ARMeilleure.IntermediateRepresentation;
  3. using ARMeilleure.State;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.IntermediateRepresentation.OperandHelper;
  10. namespace ARMeilleure.Instructions
  11. {
  12. using Func1I = Func<Operand, Operand>;
  13. using Func2I = Func<Operand, Operand, Operand>;
  14. using Func3I = Func<Operand, Operand, Operand, Operand>;
  15. static class InstEmitSimdHelper
  16. {
  17. #region "Masks"
  18. public static readonly long[] EvenMasks = new long[]
  19. {
  20. 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
  21. 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
  22. 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
  23. };
  24. public static readonly long[] OddMasks = new long[]
  25. {
  26. 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
  27. 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
  28. 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
  29. };
  30. public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
  31. #endregion
  32. #region "X86 SSE Intrinsics"
  33. public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
  34. {
  35. Intrinsic.X86Paddb,
  36. Intrinsic.X86Paddw,
  37. Intrinsic.X86Paddd,
  38. Intrinsic.X86Paddq
  39. };
  40. public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
  41. {
  42. Intrinsic.X86Pcmpeqb,
  43. Intrinsic.X86Pcmpeqw,
  44. Intrinsic.X86Pcmpeqd,
  45. Intrinsic.X86Pcmpeqq
  46. };
  47. public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
  48. {
  49. Intrinsic.X86Pcmpgtb,
  50. Intrinsic.X86Pcmpgtw,
  51. Intrinsic.X86Pcmpgtd,
  52. Intrinsic.X86Pcmpgtq
  53. };
  54. public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
  55. {
  56. Intrinsic.X86Pmaxsb,
  57. Intrinsic.X86Pmaxsw,
  58. Intrinsic.X86Pmaxsd
  59. };
  60. public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
  61. {
  62. Intrinsic.X86Pmaxub,
  63. Intrinsic.X86Pmaxuw,
  64. Intrinsic.X86Pmaxud
  65. };
  66. public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
  67. {
  68. Intrinsic.X86Pminsb,
  69. Intrinsic.X86Pminsw,
  70. Intrinsic.X86Pminsd
  71. };
  72. public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
  73. {
  74. Intrinsic.X86Pminub,
  75. Intrinsic.X86Pminuw,
  76. Intrinsic.X86Pminud
  77. };
  78. public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
  79. {
  80. Intrinsic.X86Pmovsxbw,
  81. Intrinsic.X86Pmovsxwd,
  82. Intrinsic.X86Pmovsxdq
  83. };
  84. public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
  85. {
  86. Intrinsic.X86Pmovzxbw,
  87. Intrinsic.X86Pmovzxwd,
  88. Intrinsic.X86Pmovzxdq
  89. };
  90. public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
  91. {
  92. 0,
  93. Intrinsic.X86Psllw,
  94. Intrinsic.X86Pslld,
  95. Intrinsic.X86Psllq
  96. };
  97. public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
  98. {
  99. 0,
  100. Intrinsic.X86Psraw,
  101. Intrinsic.X86Psrad
  102. };
  103. public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
  104. {
  105. 0,
  106. Intrinsic.X86Psrlw,
  107. Intrinsic.X86Psrld,
  108. Intrinsic.X86Psrlq
  109. };
  110. public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
  111. {
  112. Intrinsic.X86Psubb,
  113. Intrinsic.X86Psubw,
  114. Intrinsic.X86Psubd,
  115. Intrinsic.X86Psubq
  116. };
  117. public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
  118. {
  119. Intrinsic.X86Punpckhbw,
  120. Intrinsic.X86Punpckhwd,
  121. Intrinsic.X86Punpckhdq,
  122. Intrinsic.X86Punpckhqdq
  123. };
  124. public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
  125. {
  126. Intrinsic.X86Punpcklbw,
  127. Intrinsic.X86Punpcklwd,
  128. Intrinsic.X86Punpckldq,
  129. Intrinsic.X86Punpcklqdq
  130. };
  131. #endregion
  132. public static int GetImmShl(OpCodeSimdShImm op)
  133. {
  134. return op.Imm - (8 << op.Size);
  135. }
  136. public static int GetImmShr(OpCodeSimdShImm op)
  137. {
  138. return (8 << (op.Size + 1)) - op.Imm;
  139. }
  140. public static Operand X86GetScalar(ArmEmitterContext context, float value)
  141. {
  142. return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
  143. }
  144. public static Operand X86GetScalar(ArmEmitterContext context, double value)
  145. {
  146. return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
  147. }
  148. public static Operand X86GetScalar(ArmEmitterContext context, int value)
  149. {
  150. return context.VectorCreateScalar(Const(value));
  151. }
  152. public static Operand X86GetScalar(ArmEmitterContext context, long value)
  153. {
  154. return context.VectorCreateScalar(Const(value));
  155. }
  156. public static Operand X86GetAllElements(ArmEmitterContext context, float value)
  157. {
  158. return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
  159. }
  160. public static Operand X86GetAllElements(ArmEmitterContext context, double value)
  161. {
  162. return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
  163. }
  164. public static Operand X86GetAllElements(ArmEmitterContext context, int value)
  165. {
  166. Operand vector = context.VectorCreateScalar(Const(value));
  167. vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
  168. return vector;
  169. }
  170. public static Operand X86GetAllElements(ArmEmitterContext context, long value)
  171. {
  172. Operand vector = context.VectorCreateScalar(Const(value));
  173. vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
  174. return vector;
  175. }
  176. public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
  177. {
  178. Operand vector0 = context.VectorCreateScalar(Const(e0));
  179. Operand vector1 = context.VectorCreateScalar(Const(e1));
  180. return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
  181. }
  182. public static int X86GetRoundControl(FPRoundingMode roundMode)
  183. {
  184. switch (roundMode)
  185. {
  186. case FPRoundingMode.ToNearest: return 8 | 0; // even
  187. case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
  188. case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
  189. case FPRoundingMode.TowardsZero: return 8 | 3;
  190. }
  191. throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
  192. }
  193. public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  194. {
  195. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  196. Operand n = GetVec(op.Rn);
  197. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  198. Operand res = context.AddIntrinsic(inst, n);
  199. if ((op.Size & 1) != 0)
  200. {
  201. res = context.VectorZeroUpper64(res);
  202. }
  203. else
  204. {
  205. res = context.VectorZeroUpper96(res);
  206. }
  207. context.Copy(GetVec(op.Rd), res);
  208. }
  209. public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  210. {
  211. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  212. Operand n = GetVec(op.Rn);
  213. Operand m = GetVec(op.Rm);
  214. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  215. Operand res = context.AddIntrinsic(inst, n, m);
  216. if ((op.Size & 1) != 0)
  217. {
  218. res = context.VectorZeroUpper64(res);
  219. }
  220. else
  221. {
  222. res = context.VectorZeroUpper96(res);
  223. }
  224. context.Copy(GetVec(op.Rd), res);
  225. }
  226. public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  227. {
  228. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  229. Operand n = GetVec(op.Rn);
  230. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  231. Operand res = context.AddIntrinsic(inst, n);
  232. if (op.RegisterSize == RegisterSize.Simd64)
  233. {
  234. res = context.VectorZeroUpper64(res);
  235. }
  236. context.Copy(GetVec(op.Rd), res);
  237. }
  238. public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  239. {
  240. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  241. Operand n = GetVec(op.Rn);
  242. Operand m = GetVec(op.Rm);
  243. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  244. Operand res = context.AddIntrinsic(inst, n, m);
  245. if (op.RegisterSize == RegisterSize.Simd64)
  246. {
  247. res = context.VectorZeroUpper64(res);
  248. }
  249. context.Copy(GetVec(op.Rd), res);
  250. }
  251. public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
  252. {
  253. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  254. MethodInfo info = (op.Size & 1) == 0
  255. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
  256. : typeof(Math). GetMethod(name, new Type[] { typeof(double) });
  257. return context.Call(info, n);
  258. }
  259. public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  260. {
  261. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  262. string name = nameof(Math.Round);
  263. MethodInfo info = (op.Size & 1) == 0
  264. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
  265. : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
  266. return context.Call(info, n, Const((int)roundMode));
  267. }
  268. public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
  269. {
  270. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  271. MethodInfo info = (op.Size & 1) == 0
  272. ? typeof(SoftFloat32).GetMethod(name)
  273. : typeof(SoftFloat64).GetMethod(name);
  274. return context.Call(info, callArgs);
  275. }
  276. public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
  277. {
  278. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  279. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  280. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  281. Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  282. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
  283. }
  284. public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
  285. {
  286. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  287. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  288. Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
  289. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  290. Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  291. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
  292. }
  293. public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
  294. {
  295. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  296. Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
  297. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
  298. context.Copy(GetVec(op.Rd), d);
  299. }
  300. public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
  301. {
  302. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  303. Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
  304. Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
  305. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
  306. context.Copy(GetVec(op.Rd), d);
  307. }
  308. public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
  309. {
  310. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  311. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  312. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
  313. context.Copy(GetVec(op.Rd), d);
  314. }
  315. public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
  316. {
  317. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  318. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  319. Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
  320. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
  321. context.Copy(GetVec(op.Rd), d);
  322. }
  323. public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
  324. {
  325. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  326. Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
  327. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  328. Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
  329. d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
  330. context.Copy(GetVec(op.Rd), d);
  331. }
  332. public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
  333. {
  334. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  335. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  336. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  337. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
  338. }
  339. public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
  340. {
  341. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  342. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  343. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  344. Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
  345. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
  346. }
  347. public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
  348. {
  349. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  350. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  351. Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
  352. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  353. Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
  354. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
  355. }
  356. public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
  357. {
  358. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  359. Operand res = context.VectorZero();
  360. int sizeF = op.Size & 1;
  361. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  362. int elems = op.GetBytesCount() >> sizeF + 2;
  363. for (int index = 0; index < elems; index++)
  364. {
  365. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  366. res = context.VectorInsert(res, emit(ne), index);
  367. }
  368. context.Copy(GetVec(op.Rd), res);
  369. }
  370. public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
  371. {
  372. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  373. Operand res = context.VectorZero();
  374. int sizeF = op.Size & 1;
  375. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  376. int elems = op.GetBytesCount() >> sizeF + 2;
  377. for (int index = 0; index < elems; index++)
  378. {
  379. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  380. Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
  381. res = context.VectorInsert(res, emit(ne, me), index);
  382. }
  383. context.Copy(GetVec(op.Rd), res);
  384. }
  385. public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
  386. {
  387. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  388. Operand res = context.VectorZero();
  389. int sizeF = op.Size & 1;
  390. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  391. int elems = op.GetBytesCount() >> sizeF + 2;
  392. for (int index = 0; index < elems; index++)
  393. {
  394. Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
  395. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  396. Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
  397. res = context.VectorInsert(res, emit(de, ne, me), index);
  398. }
  399. context.Copy(GetVec(op.Rd), res);
  400. }
  401. public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
  402. {
  403. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  404. Operand res = context.VectorZero();
  405. int sizeF = op.Size & 1;
  406. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  407. int elems = op.GetBytesCount() >> sizeF + 2;
  408. for (int index = 0; index < elems; index++)
  409. {
  410. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  411. Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  412. res = context.VectorInsert(res, emit(ne, me), index);
  413. }
  414. context.Copy(GetVec(op.Rd), res);
  415. }
  416. public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
  417. {
  418. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  419. Operand res = context.VectorZero();
  420. int sizeF = op.Size & 1;
  421. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  422. int elems = op.GetBytesCount() >> sizeF + 2;
  423. for (int index = 0; index < elems; index++)
  424. {
  425. Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
  426. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  427. Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  428. res = context.VectorInsert(res, emit(de, ne, me), index);
  429. }
  430. context.Copy(GetVec(op.Rd), res);
  431. }
  432. public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
  433. {
  434. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  435. Operand res = context.VectorZero();
  436. int elems = op.GetBytesCount() >> op.Size;
  437. for (int index = 0; index < elems; index++)
  438. {
  439. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  440. res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
  441. }
  442. context.Copy(GetVec(op.Rd), res);
  443. }
  444. public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
  445. {
  446. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  447. Operand res = context.VectorZero();
  448. int elems = op.GetBytesCount() >> op.Size;
  449. for (int index = 0; index < elems; index++)
  450. {
  451. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  452. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  453. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  454. }
  455. context.Copy(GetVec(op.Rd), res);
  456. }
  457. public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
  458. {
  459. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  460. Operand res = context.VectorZero();
  461. int elems = op.GetBytesCount() >> op.Size;
  462. for (int index = 0; index < elems; index++)
  463. {
  464. Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
  465. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  466. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  467. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  468. }
  469. context.Copy(GetVec(op.Rd), res);
  470. }
  471. public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
  472. {
  473. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  474. Operand res = context.VectorZero();
  475. int elems = op.GetBytesCount() >> op.Size;
  476. for (int index = 0; index < elems; index++)
  477. {
  478. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  479. res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
  480. }
  481. context.Copy(GetVec(op.Rd), res);
  482. }
  483. public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
  484. {
  485. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  486. Operand res = context.VectorZero();
  487. int elems = op.GetBytesCount() >> op.Size;
  488. for (int index = 0; index < elems; index++)
  489. {
  490. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  491. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  492. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  493. }
  494. context.Copy(GetVec(op.Rd), res);
  495. }
  496. public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
  497. {
  498. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  499. Operand res = context.VectorZero();
  500. int elems = op.GetBytesCount() >> op.Size;
  501. for (int index = 0; index < elems; index++)
  502. {
  503. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  504. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  505. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  506. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  507. }
  508. context.Copy(GetVec(op.Rd), res);
  509. }
  510. public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
  511. {
  512. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  513. Operand res = context.VectorZero();
  514. Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
  515. int elems = op.GetBytesCount() >> op.Size;
  516. for (int index = 0; index < elems; index++)
  517. {
  518. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  519. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  520. }
  521. context.Copy(GetVec(op.Rd), res);
  522. }
  523. public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
  524. {
  525. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  526. Operand res = context.VectorZero();
  527. Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
  528. int elems = op.GetBytesCount() >> op.Size;
  529. for (int index = 0; index < elems; index++)
  530. {
  531. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  532. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  533. }
  534. context.Copy(GetVec(op.Rd), res);
  535. }
  536. public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
  537. {
  538. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  539. Operand res = context.VectorZero();
  540. Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
  541. int elems = op.GetBytesCount() >> op.Size;
  542. for (int index = 0; index < elems; index++)
  543. {
  544. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  545. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  546. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  547. }
  548. context.Copy(GetVec(op.Rd), res);
  549. }
  550. public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
  551. {
  552. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  553. Operand imm = Const(op.Immediate);
  554. Operand res = context.VectorZero();
  555. int elems = op.GetBytesCount() >> op.Size;
  556. for (int index = 0; index < elems; index++)
  557. {
  558. res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
  559. }
  560. context.Copy(GetVec(op.Rd), res);
  561. }
  562. public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
  563. {
  564. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  565. Operand imm = Const(op.Immediate);
  566. Operand res = context.VectorZero();
  567. int elems = op.GetBytesCount() >> op.Size;
  568. for (int index = 0; index < elems; index++)
  569. {
  570. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  571. res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
  572. }
  573. context.Copy(GetVec(op.Rd), res);
  574. }
  575. public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
  576. {
  577. EmitVectorWidenRmBinaryOp(context, emit, signed: true);
  578. }
  579. public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
  580. {
  581. EmitVectorWidenRmBinaryOp(context, emit, signed: false);
  582. }
  583. private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
  584. {
  585. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  586. Operand res = context.VectorZero();
  587. int elems = 8 >> op.Size;
  588. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  589. for (int index = 0; index < elems; index++)
  590. {
  591. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
  592. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  593. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  594. }
  595. context.Copy(GetVec(op.Rd), res);
  596. }
  597. public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
  598. {
  599. EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
  600. }
  601. public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
  602. {
  603. EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
  604. }
  605. private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
  606. {
  607. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  608. Operand res = context.VectorZero();
  609. int elems = 8 >> op.Size;
  610. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  611. for (int index = 0; index < elems; index++)
  612. {
  613. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  614. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  615. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  616. }
  617. context.Copy(GetVec(op.Rd), res);
  618. }
  619. public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
  620. {
  621. EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
  622. }
  623. public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
  624. {
  625. EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
  626. }
  627. private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
  628. {
  629. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  630. Operand res = context.VectorZero();
  631. int elems = 8 >> op.Size;
  632. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  633. for (int index = 0; index < elems; index++)
  634. {
  635. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
  636. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  637. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  638. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
  639. }
  640. context.Copy(GetVec(op.Rd), res);
  641. }
  642. public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
  643. {
  644. EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
  645. }
  646. public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
  647. {
  648. EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
  649. }
  650. private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
  651. {
  652. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  653. Operand res = context.VectorZero();
  654. Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
  655. int elems = 8 >> op.Size;
  656. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  657. for (int index = 0; index < elems; index++)
  658. {
  659. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  660. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  661. }
  662. context.Copy(GetVec(op.Rd), res);
  663. }
  664. public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
  665. {
  666. EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
  667. }
  668. public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
  669. {
  670. EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
  671. }
  672. private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
  673. {
  674. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  675. Operand res = context.VectorZero();
  676. Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
  677. int elems = 8 >> op.Size;
  678. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  679. for (int index = 0; index < elems; index++)
  680. {
  681. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
  682. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  683. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
  684. }
  685. context.Copy(GetVec(op.Rd), res);
  686. }
  687. public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
  688. {
  689. EmitVectorPairwiseOp(context, emit, signed: true);
  690. }
  691. public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
  692. {
  693. EmitVectorPairwiseOp(context, emit, signed: false);
  694. }
  695. private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
  696. {
  697. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  698. Operand res = context.VectorZero();
  699. int pairs = op.GetPairsCount() >> op.Size;
  700. for (int index = 0; index < pairs; index++)
  701. {
  702. int pairIndex = index << 1;
  703. Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
  704. Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
  705. Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
  706. Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
  707. res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
  708. res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
  709. }
  710. context.Copy(GetVec(op.Rd), res);
  711. }
  712. public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
  713. {
  714. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  715. Operand n = GetVec(op.Rn);
  716. Operand m = GetVec(op.Rm);
  717. if (op.RegisterSize == RegisterSize.Simd64)
  718. {
  719. Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
  720. Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
  721. Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
  722. Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
  723. Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
  724. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
  725. }
  726. else if (op.Size < 3)
  727. {
  728. Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
  729. Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
  730. Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
  731. Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
  732. Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
  733. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
  734. }
  735. else
  736. {
  737. Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
  738. Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
  739. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
  740. }
  741. }
  742. public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
  743. {
  744. EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
  745. }
  746. public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
  747. {
  748. EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
  749. }
  750. public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
  751. {
  752. EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
  753. }
  754. public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
  755. {
  756. EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
  757. }
  758. private static void EmitVectorAcrossVectorOp(
  759. ArmEmitterContext context,
  760. Func2I emit,
  761. bool signed,
  762. bool isLong)
  763. {
  764. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  765. int elems = op.GetBytesCount() >> op.Size;
  766. Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
  767. for (int index = 1; index < elems; index++)
  768. {
  769. Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  770. res = emit(res, n);
  771. }
  772. int size = isLong ? op.Size + 1 : op.Size;
  773. Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
  774. context.Copy(GetVec(op.Rd), d);
  775. }
  776. public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
  777. {
  778. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  779. Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
  780. Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  781. for (int index = 1; index < 4; index++)
  782. {
  783. Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
  784. res = emit(res, n);
  785. }
  786. Operand d = context.VectorInsert(context.VectorZero(), res, 0);
  787. context.Copy(GetVec(op.Rd), d);
  788. }
  789. public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
  790. {
  791. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  792. Operand res = context.VectorZero();
  793. int sizeF = op.Size & 1;
  794. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  795. int pairs = op.GetPairsCount() >> sizeF + 2;
  796. for (int index = 0; index < pairs; index++)
  797. {
  798. int pairIndex = index << 1;
  799. Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
  800. Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
  801. Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
  802. Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
  803. res = context.VectorInsert(res, emit(n0, n1), index);
  804. res = context.VectorInsert(res, emit(m0, m1), pairs + index);
  805. }
  806. context.Copy(GetVec(op.Rd), res);
  807. }
  808. public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  809. {
  810. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  811. Operand n = GetVec(op.Rn);
  812. Operand m = GetVec(op.Rm);
  813. int sizeF = op.Size & 1;
  814. if (sizeF == 0)
  815. {
  816. if (op.RegisterSize == RegisterSize.Simd64)
  817. {
  818. Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
  819. Operand zero = context.VectorZero();
  820. Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
  821. Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
  822. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
  823. }
  824. else /* if (op.RegisterSize == RegisterSize.Simd128) */
  825. {
  826. const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
  827. const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
  828. Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
  829. Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
  830. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
  831. }
  832. }
  833. else /* if (sizeF == 1) */
  834. {
  835. Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
  836. Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
  837. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
  838. }
  839. }
  840. public enum CmpCondition
  841. {
  842. // Legacy Sse.
  843. Equal = 0, // Ordered, non-signaling.
  844. LessThan = 1, // Ordered, signaling.
  845. LessThanOrEqual = 2, // Ordered, signaling.
  846. UnorderedQ = 3, // Non-signaling.
  847. NotLessThan = 5, // Unordered, signaling.
  848. NotLessThanOrEqual = 6, // Unordered, signaling.
  849. OrderedQ = 7, // Non-signaling.
  850. // Vex.
  851. GreaterThanOrEqual = 13, // Ordered, signaling.
  852. GreaterThan = 14, // Ordered, signaling.
  853. OrderedS = 23 // Signaling.
  854. }
  855. [Flags]
  856. public enum SaturatingFlags
  857. {
  858. Scalar = 1 << 0,
  859. Signed = 1 << 1,
  860. Add = 1 << 2,
  861. Sub = 1 << 3,
  862. Accumulate = 1 << 4,
  863. ScalarSx = Scalar | Signed,
  864. ScalarZx = Scalar,
  865. VectorSx = Signed,
  866. VectorZx = 0
  867. }
  868. public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
  869. {
  870. EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
  871. }
  872. public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
  873. {
  874. EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
  875. }
  876. private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
  877. {
  878. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  879. Operand res = context.VectorZero();
  880. bool scalar = (flags & SaturatingFlags.Scalar) != 0;
  881. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  882. for (int index = 0; index < elems; index++)
  883. {
  884. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  885. Operand de;
  886. if (op.Size <= 2)
  887. {
  888. de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true);
  889. }
  890. else /* if (op.Size == 3) */
  891. {
  892. de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
  893. }
  894. res = EmitVectorInsert(context, res, de, index, op.Size);
  895. }
  896. context.Copy(GetVec(op.Rd), res);
  897. }
  898. public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
  899. {
  900. EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags);
  901. }
  902. public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
  903. {
  904. EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags);
  905. }
  906. public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
  907. {
  908. EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags);
  909. }
  910. public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
  911. {
  912. EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags);
  913. }
  914. public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
  915. {
  916. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  917. Operand res = context.VectorZero();
  918. bool scalar = (flags & SaturatingFlags.Scalar) != 0;
  919. bool signed = (flags & SaturatingFlags.Signed) != 0;
  920. bool add = (flags & SaturatingFlags.Add) != 0;
  921. bool sub = (flags & SaturatingFlags.Sub) != 0;
  922. bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
  923. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  924. if (add || sub)
  925. {
  926. OpCodeSimdReg opReg = (OpCodeSimdReg)op;
  927. for (int index = 0; index < elems; index++)
  928. {
  929. Operand de;
  930. Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
  931. Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
  932. if (op.Size <= 2)
  933. {
  934. Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
  935. de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
  936. }
  937. else if (add) /* if (op.Size == 3) */
  938. {
  939. de = EmitBinarySatQAdd(context, ne, me, signed);
  940. }
  941. else /* if (sub) */
  942. {
  943. de = EmitBinarySatQSub(context, ne, me, signed);
  944. }
  945. res = EmitVectorInsert(context, res, de, index, op.Size);
  946. }
  947. }
  948. else if (accumulate)
  949. {
  950. for (int index = 0; index < elems; index++)
  951. {
  952. Operand de;
  953. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
  954. Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  955. if (op.Size <= 2)
  956. {
  957. Operand temp = context.Add(ne, me);
  958. de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
  959. }
  960. else /* if (op.Size == 3) */
  961. {
  962. de = EmitBinarySatQAccumulate(context, ne, me, signed);
  963. }
  964. res = EmitVectorInsert(context, res, de, index, op.Size);
  965. }
  966. }
  967. else
  968. {
  969. OpCodeSimdReg opReg = (OpCodeSimdReg)op;
  970. for (int index = 0; index < elems; index++)
  971. {
  972. Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
  973. Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
  974. Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed);
  975. res = EmitVectorInsert(context, res, de, index, op.Size);
  976. }
  977. }
  978. context.Copy(GetVec(op.Rd), res);
  979. }
  980. [Flags]
  981. public enum SaturatingNarrowFlags
  982. {
  983. Scalar = 1 << 0,
  984. SignedSrc = 1 << 1,
  985. SignedDst = 1 << 2,
  986. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  987. ScalarSxZx = Scalar | SignedSrc,
  988. ScalarZxZx = Scalar,
  989. VectorSxSx = SignedSrc | SignedDst,
  990. VectorSxZx = SignedSrc,
  991. VectorZxZx = 0
  992. }
  993. public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
  994. {
  995. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  996. bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
  997. bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
  998. bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
  999. int elems = !scalar ? 8 >> op.Size : 1;
  1000. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  1001. Operand d = GetVec(op.Rd);
  1002. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  1003. for (int index = 0; index < elems; index++)
  1004. {
  1005. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  1006. Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst);
  1007. res = EmitVectorInsert(context, res, temp, part + index, op.Size);
  1008. }
  1009. context.Copy(d, res);
  1010. }
  1011. // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
  1012. public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst)
  1013. {
  1014. if ((uint)sizeDst > 2u)
  1015. {
  1016. throw new ArgumentOutOfRangeException(nameof(sizeDst));
  1017. }
  1018. MethodInfo info;
  1019. if (signedSrc)
  1020. {
  1021. info = signedDst
  1022. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedSrcSignedDstSatQ))
  1023. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedSrcUnsignedDstSatQ));
  1024. }
  1025. else
  1026. {
  1027. info = signedDst
  1028. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcSignedDstSatQ))
  1029. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ));
  1030. }
  1031. return context.Call(info, op, Const(sizeDst));
  1032. }
  1033. // TSrc (64bit) == TDst (64bit); signed.
  1034. public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
  1035. {
  1036. Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
  1037. return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnarySignedSatQAbsOrNeg)), op);
  1038. }
  1039. // TSrcs (64bit) == TDst (64bit); signed, unsigned.
  1040. public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
  1041. {
  1042. Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
  1043. MethodInfo info = signed
  1044. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAdd))
  1045. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQAdd));
  1046. return context.Call(info, op1, op2);
  1047. }
  1048. // TSrcs (64bit) == TDst (64bit); signed, unsigned.
  1049. public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
  1050. {
  1051. Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
  1052. MethodInfo info = signed
  1053. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQSub))
  1054. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQSub));
  1055. return context.Call(info, op1, op2);
  1056. }
  1057. // TSrcs (64bit) == TDst (64bit); signed, unsigned.
  1058. public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
  1059. {
  1060. Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
  1061. MethodInfo info = signed
  1062. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAcc))
  1063. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQAcc));
  1064. return context.Call(info, op1, op2);
  1065. }
  1066. public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
  1067. {
  1068. Operand mask;
  1069. if (single)
  1070. {
  1071. mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
  1072. }
  1073. else
  1074. {
  1075. mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
  1076. }
  1077. return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
  1078. }
  1079. public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
  1080. {
  1081. return EmitVectorExtract(context, reg, index, size, true);
  1082. }
  1083. public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
  1084. {
  1085. return EmitVectorExtract(context, reg, index, size, false);
  1086. }
  1087. public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
  1088. {
  1089. ThrowIfInvalid(index, size);
  1090. Operand res = null;
  1091. switch (size)
  1092. {
  1093. case 0:
  1094. res = context.VectorExtract8(GetVec(reg), index);
  1095. break;
  1096. case 1:
  1097. res = context.VectorExtract16(GetVec(reg), index);
  1098. break;
  1099. case 2:
  1100. res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
  1101. break;
  1102. case 3:
  1103. res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
  1104. break;
  1105. }
  1106. if (signed)
  1107. {
  1108. switch (size)
  1109. {
  1110. case 0: res = context.SignExtend8 (OperandType.I64, res); break;
  1111. case 1: res = context.SignExtend16(OperandType.I64, res); break;
  1112. case 2: res = context.SignExtend32(OperandType.I64, res); break;
  1113. }
  1114. }
  1115. else
  1116. {
  1117. switch (size)
  1118. {
  1119. case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
  1120. case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
  1121. case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
  1122. }
  1123. }
  1124. return res;
  1125. }
  1126. public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
  1127. {
  1128. ThrowIfInvalid(index, size);
  1129. if (size < 3 && value.Type == OperandType.I64)
  1130. {
  1131. value = context.ConvertI64ToI32(value);
  1132. }
  1133. switch (size)
  1134. {
  1135. case 0: vector = context.VectorInsert8 (vector, value, index); break;
  1136. case 1: vector = context.VectorInsert16(vector, value, index); break;
  1137. case 2: vector = context.VectorInsert (vector, value, index); break;
  1138. case 3: vector = context.VectorInsert (vector, value, index); break;
  1139. }
  1140. return vector;
  1141. }
  1142. public static void ThrowIfInvalid(int index, int size)
  1143. {
  1144. if ((uint)size > 3u)
  1145. {
  1146. throw new ArgumentOutOfRangeException(nameof(size));
  1147. }
  1148. if ((uint)index >= 16u >> size)
  1149. {
  1150. throw new ArgumentOutOfRangeException(nameof(index));
  1151. }
  1152. }
  1153. }
  1154. }