InstEmitSimdHelper.cs 76 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088
  1. using ARMeilleure.CodeGen.X86;
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.IntermediateRepresentation;
  4. using ARMeilleure.State;
  5. using ARMeilleure.Translation;
  6. using System;
  7. using System.Diagnostics;
  8. using System.Reflection;
  9. using static ARMeilleure.Instructions.InstEmitHelper;
  10. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  11. namespace ARMeilleure.Instructions
  12. {
  13. using Func1I = Func<Operand, Operand>;
  14. using Func2I = Func<Operand, Operand, Operand>;
  15. using Func3I = Func<Operand, Operand, Operand, Operand>;
  16. static class InstEmitSimdHelper
  17. {
  18. #region "Masks"
  19. public static readonly long[] EvenMasks = new long[]
  20. {
  21. 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
  22. 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
  23. 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
  24. };
  25. public static readonly long[] OddMasks = new long[]
  26. {
  27. 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
  28. 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
  29. 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
  30. };
  31. public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
  32. public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
  33. {
  34. ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
  35. (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
  36. return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
  37. }
  38. #endregion
  39. #region "X86 SSE Intrinsics"
  40. public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
  41. {
  42. Intrinsic.X86Paddb,
  43. Intrinsic.X86Paddw,
  44. Intrinsic.X86Paddd,
  45. Intrinsic.X86Paddq
  46. };
  47. public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
  48. {
  49. Intrinsic.X86Pcmpeqb,
  50. Intrinsic.X86Pcmpeqw,
  51. Intrinsic.X86Pcmpeqd,
  52. Intrinsic.X86Pcmpeqq
  53. };
  54. public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
  55. {
  56. Intrinsic.X86Pcmpgtb,
  57. Intrinsic.X86Pcmpgtw,
  58. Intrinsic.X86Pcmpgtd,
  59. Intrinsic.X86Pcmpgtq
  60. };
  61. public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
  62. {
  63. Intrinsic.X86Pmaxsb,
  64. Intrinsic.X86Pmaxsw,
  65. Intrinsic.X86Pmaxsd
  66. };
  67. public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
  68. {
  69. Intrinsic.X86Pmaxub,
  70. Intrinsic.X86Pmaxuw,
  71. Intrinsic.X86Pmaxud
  72. };
  73. public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
  74. {
  75. Intrinsic.X86Pminsb,
  76. Intrinsic.X86Pminsw,
  77. Intrinsic.X86Pminsd
  78. };
  79. public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
  80. {
  81. Intrinsic.X86Pminub,
  82. Intrinsic.X86Pminuw,
  83. Intrinsic.X86Pminud
  84. };
  85. public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
  86. {
  87. Intrinsic.X86Pmovsxbw,
  88. Intrinsic.X86Pmovsxwd,
  89. Intrinsic.X86Pmovsxdq
  90. };
  91. public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
  92. {
  93. Intrinsic.X86Pmovzxbw,
  94. Intrinsic.X86Pmovzxwd,
  95. Intrinsic.X86Pmovzxdq
  96. };
  97. public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
  98. {
  99. 0,
  100. Intrinsic.X86Psllw,
  101. Intrinsic.X86Pslld,
  102. Intrinsic.X86Psllq
  103. };
  104. public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
  105. {
  106. 0,
  107. Intrinsic.X86Psraw,
  108. Intrinsic.X86Psrad
  109. };
  110. public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
  111. {
  112. 0,
  113. Intrinsic.X86Psrlw,
  114. Intrinsic.X86Psrld,
  115. Intrinsic.X86Psrlq
  116. };
  117. public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
  118. {
  119. Intrinsic.X86Psubb,
  120. Intrinsic.X86Psubw,
  121. Intrinsic.X86Psubd,
  122. Intrinsic.X86Psubq
  123. };
  124. public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
  125. {
  126. Intrinsic.X86Punpckhbw,
  127. Intrinsic.X86Punpckhwd,
  128. Intrinsic.X86Punpckhdq,
  129. Intrinsic.X86Punpckhqdq
  130. };
  131. public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
  132. {
  133. Intrinsic.X86Punpcklbw,
  134. Intrinsic.X86Punpcklwd,
  135. Intrinsic.X86Punpckldq,
  136. Intrinsic.X86Punpcklqdq
  137. };
  138. #endregion
  139. public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
  140. {
  141. if (Optimizations.UseSse2)
  142. {
  143. Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
  144. Operand fzTrue = getFpFlag(FPState.FzFlag);
  145. Operand r0True = getFpFlag(FPState.RMode0Flag);
  146. Operand r1True = getFpFlag(FPState.RMode1Flag);
  147. mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
  148. mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
  149. // X86 round modes in order: nearest, negative, positive, zero
  150. // ARM round modes in order: nearest, positive, negative, zero
  151. // Read the bits backwards to correct this.
  152. mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
  153. mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
  154. context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
  155. }
  156. else if (Optimizations.UseAdvSimd)
  157. {
  158. Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
  159. Operand fzTrue = getFpFlag(FPState.FzFlag);
  160. Operand r0True = getFpFlag(FPState.RMode0Flag);
  161. Operand r1True = getFpFlag(FPState.RMode1Flag);
  162. fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
  163. fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
  164. fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
  165. fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
  166. context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
  167. // TODO: Restore FPSR
  168. }
  169. }
  170. public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
  171. {
  172. if (Optimizations.UseSse2)
  173. {
  174. Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
  175. // Unset round mode (to nearest) and ftz.
  176. mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
  177. context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
  178. // Status flags would be stored here if they were used.
  179. }
  180. else if (Optimizations.UseAdvSimd)
  181. {
  182. Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
  183. // Unset round mode (to nearest) and fz.
  184. fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
  185. context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
  186. // TODO: Store FPSR
  187. }
  188. }
  189. public static int GetImmShl(OpCodeSimdShImm op)
  190. {
  191. return op.Imm - (8 << op.Size);
  192. }
  193. public static int GetImmShr(OpCodeSimdShImm op)
  194. {
  195. return (8 << (op.Size + 1)) - op.Imm;
  196. }
  197. public static Operand X86GetScalar(ArmEmitterContext context, float value)
  198. {
  199. return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
  200. }
  201. public static Operand X86GetScalar(ArmEmitterContext context, double value)
  202. {
  203. return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
  204. }
  205. public static Operand X86GetScalar(ArmEmitterContext context, int value)
  206. {
  207. return context.VectorCreateScalar(Const(value));
  208. }
  209. public static Operand X86GetScalar(ArmEmitterContext context, long value)
  210. {
  211. return context.VectorCreateScalar(Const(value));
  212. }
  213. public static Operand X86GetAllElements(ArmEmitterContext context, float value)
  214. {
  215. return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
  216. }
  217. public static Operand X86GetAllElements(ArmEmitterContext context, double value)
  218. {
  219. return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
  220. }
  221. public static Operand X86GetAllElements(ArmEmitterContext context, short value)
  222. {
  223. ulong value1 = (ushort)value;
  224. ulong value2 = value1 << 16 | value1;
  225. ulong value4 = value2 << 32 | value2;
  226. return X86GetAllElements(context, (long)value4);
  227. }
  228. public static Operand X86GetAllElements(ArmEmitterContext context, int value)
  229. {
  230. Operand vector = context.VectorCreateScalar(Const(value));
  231. vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
  232. return vector;
  233. }
  234. public static Operand X86GetAllElements(ArmEmitterContext context, long value)
  235. {
  236. Operand vector = context.VectorCreateScalar(Const(value));
  237. vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
  238. return vector;
  239. }
  240. public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
  241. {
  242. return X86GetElements(context, (ulong)e1, (ulong)e0);
  243. }
  244. public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
  245. {
  246. Operand vector0 = context.VectorCreateScalar(Const(e0));
  247. Operand vector1 = context.VectorCreateScalar(Const(e1));
  248. return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
  249. }
  250. public static int X86GetRoundControl(FPRoundingMode roundMode)
  251. {
  252. switch (roundMode)
  253. {
  254. case FPRoundingMode.ToNearest: return 8 | 0; // even
  255. case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
  256. case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
  257. case FPRoundingMode.TowardsZero: return 8 | 3;
  258. }
  259. throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
  260. }
  261. public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
  262. {
  263. Debug.Assert(n.Type == OperandType.V128);
  264. Operand nCopy = context.Copy(n);
  265. Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
  266. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  267. if ((op.Size & 1) == 0)
  268. {
  269. Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
  270. signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
  271. // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
  272. Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
  273. valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
  274. nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
  275. nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
  276. }
  277. else
  278. {
  279. Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
  280. signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
  281. // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
  282. Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
  283. valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
  284. nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
  285. nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
  286. }
  287. return nCopy;
  288. }
  289. public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
  290. {
  291. Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
  292. Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
  293. Operand c1 = Const(op.Type, 0x33L);
  294. Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
  295. return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
  296. }
  297. public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  298. {
  299. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  300. Operand n = GetVec(op.Rn);
  301. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  302. Operand res = context.AddIntrinsic(inst, n);
  303. if ((op.Size & 1) != 0)
  304. {
  305. res = context.VectorZeroUpper64(res);
  306. }
  307. else
  308. {
  309. res = context.VectorZeroUpper96(res);
  310. }
  311. context.Copy(GetVec(op.Rd), res);
  312. }
  313. public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  314. {
  315. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  316. Operand n = GetVec(op.Rn);
  317. Operand m = GetVec(op.Rm);
  318. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  319. Operand res = context.AddIntrinsic(inst, n, m);
  320. if ((op.Size & 1) != 0)
  321. {
  322. res = context.VectorZeroUpper64(res);
  323. }
  324. else
  325. {
  326. res = context.VectorZeroUpper96(res);
  327. }
  328. context.Copy(GetVec(op.Rd), res);
  329. }
  330. public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  331. {
  332. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  333. Operand n = GetVec(op.Rn);
  334. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  335. Operand res = context.AddIntrinsic(inst, n);
  336. if (op.RegisterSize == RegisterSize.Simd64)
  337. {
  338. res = context.VectorZeroUpper64(res);
  339. }
  340. context.Copy(GetVec(op.Rd), res);
  341. }
  342. public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
  343. {
  344. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  345. Operand n = GetVec(op.Rn);
  346. Operand m = GetVec(op.Rm);
  347. Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
  348. Operand res = context.AddIntrinsic(inst, n, m);
  349. if (op.RegisterSize == RegisterSize.Simd64)
  350. {
  351. res = context.VectorZeroUpper64(res);
  352. }
  353. context.Copy(GetVec(op.Rd), res);
  354. }
  355. public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
  356. {
  357. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  358. MethodInfo info = (op.Size & 1) == 0
  359. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
  360. : typeof(Math). GetMethod(name, new Type[] { typeof(double) });
  361. return context.Call(info, n);
  362. }
  363. public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
  364. {
  365. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  366. string name = nameof(Math.Round);
  367. MethodInfo info = (op.Size & 1) == 0
  368. ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
  369. : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
  370. return context.Call(info, n, Const((int)roundMode));
  371. }
  372. public static Operand EmitGetRoundingMode(ArmEmitterContext context)
  373. {
  374. Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
  375. rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
  376. return rMode;
  377. }
  378. public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
  379. {
  380. Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
  381. Operand lbl1 = Label();
  382. Operand lbl2 = Label();
  383. Operand lbl3 = Label();
  384. Operand lblEnd = Label();
  385. Operand rN = Const((int)FPRoundingMode.ToNearest);
  386. Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
  387. Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
  388. Operand res = context.AllocateLocal(op.Type);
  389. Operand rMode = EmitGetRoundingMode(context);
  390. context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
  391. context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
  392. context.Branch(lblEnd);
  393. context.MarkLabel(lbl1);
  394. context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
  395. context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
  396. context.Branch(lblEnd);
  397. context.MarkLabel(lbl2);
  398. context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
  399. context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
  400. context.Branch(lblEnd);
  401. context.MarkLabel(lbl3);
  402. context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
  403. context.Branch(lblEnd);
  404. context.MarkLabel(lblEnd);
  405. return res;
  406. }
  407. public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
  408. {
  409. IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
  410. MethodInfo info = (op.Size & 1) == 0
  411. ? typeof(SoftFloat32).GetMethod(name)
  412. : typeof(SoftFloat64).GetMethod(name);
  413. context.ExitArmFpMode();
  414. context.StoreToContext();
  415. Operand res = context.Call(info, callArgs);
  416. context.LoadFromContext();
  417. context.EnterArmFpMode();
  418. return res;
  419. }
  420. public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
  421. {
  422. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  423. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  424. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  425. Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  426. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
  427. }
  428. public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
  429. {
  430. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  431. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  432. Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
  433. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  434. Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  435. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
  436. }
  437. public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
  438. {
  439. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  440. Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
  441. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
  442. context.Copy(GetVec(op.Rd), d);
  443. }
  444. public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
  445. {
  446. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  447. Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
  448. Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
  449. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
  450. context.Copy(GetVec(op.Rd), d);
  451. }
  452. public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
  453. {
  454. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  455. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  456. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
  457. context.Copy(GetVec(op.Rd), d);
  458. }
  459. public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
  460. {
  461. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  462. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  463. Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
  464. Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
  465. context.Copy(GetVec(op.Rd), d);
  466. }
  467. public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
  468. {
  469. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  470. Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
  471. Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
  472. Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
  473. d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
  474. context.Copy(GetVec(op.Rd), d);
  475. }
  476. public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
  477. {
  478. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  479. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  480. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  481. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
  482. }
  483. public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
  484. {
  485. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  486. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  487. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  488. Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
  489. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
  490. }
  491. public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
  492. {
  493. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  494. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  495. Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
  496. Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
  497. Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
  498. context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
  499. }
  500. public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
  501. {
  502. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  503. Operand res = context.VectorZero();
  504. int sizeF = op.Size & 1;
  505. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  506. int elems = op.GetBytesCount() >> sizeF + 2;
  507. for (int index = 0; index < elems; index++)
  508. {
  509. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  510. res = context.VectorInsert(res, emit(ne), index);
  511. }
  512. context.Copy(GetVec(op.Rd), res);
  513. }
  514. public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
  515. {
  516. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  517. Operand res = context.VectorZero();
  518. int sizeF = op.Size & 1;
  519. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  520. int elems = op.GetBytesCount() >> sizeF + 2;
  521. for (int index = 0; index < elems; index++)
  522. {
  523. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  524. Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
  525. res = context.VectorInsert(res, emit(ne, me), index);
  526. }
  527. context.Copy(GetVec(op.Rd), res);
  528. }
  529. public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
  530. {
  531. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  532. Operand res = context.VectorZero();
  533. int sizeF = op.Size & 1;
  534. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  535. int elems = op.GetBytesCount() >> sizeF + 2;
  536. for (int index = 0; index < elems; index++)
  537. {
  538. Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
  539. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  540. Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
  541. res = context.VectorInsert(res, emit(de, ne, me), index);
  542. }
  543. context.Copy(GetVec(op.Rd), res);
  544. }
  545. public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
  546. {
  547. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  548. Operand res = context.VectorZero();
  549. int sizeF = op.Size & 1;
  550. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  551. int elems = op.GetBytesCount() >> sizeF + 2;
  552. for (int index = 0; index < elems; index++)
  553. {
  554. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  555. Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  556. res = context.VectorInsert(res, emit(ne, me), index);
  557. }
  558. context.Copy(GetVec(op.Rd), res);
  559. }
  560. public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
  561. {
  562. OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
  563. Operand res = context.VectorZero();
  564. int sizeF = op.Size & 1;
  565. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  566. int elems = op.GetBytesCount() >> sizeF + 2;
  567. for (int index = 0; index < elems; index++)
  568. {
  569. Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
  570. Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
  571. Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
  572. res = context.VectorInsert(res, emit(de, ne, me), index);
  573. }
  574. context.Copy(GetVec(op.Rd), res);
  575. }
  576. public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
  577. {
  578. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  579. Operand res = context.VectorZero();
  580. int elems = op.GetBytesCount() >> op.Size;
  581. for (int index = 0; index < elems; index++)
  582. {
  583. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  584. res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
  585. }
  586. context.Copy(GetVec(op.Rd), res);
  587. }
  588. public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
  589. {
  590. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  591. Operand res = context.VectorZero();
  592. int elems = op.GetBytesCount() >> op.Size;
  593. for (int index = 0; index < elems; index++)
  594. {
  595. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  596. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  597. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  598. }
  599. context.Copy(GetVec(op.Rd), res);
  600. }
  601. public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
  602. {
  603. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  604. Operand res = context.VectorZero();
  605. int elems = op.GetBytesCount() >> op.Size;
  606. for (int index = 0; index < elems; index++)
  607. {
  608. Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
  609. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  610. Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
  611. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  612. }
  613. context.Copy(GetVec(op.Rd), res);
  614. }
  615. public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
  616. {
  617. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  618. Operand res = context.VectorZero();
  619. int elems = op.GetBytesCount() >> op.Size;
  620. for (int index = 0; index < elems; index++)
  621. {
  622. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  623. res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
  624. }
  625. context.Copy(GetVec(op.Rd), res);
  626. }
  627. public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
  628. {
  629. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  630. Operand res = context.VectorZero();
  631. int elems = op.GetBytesCount() >> op.Size;
  632. for (int index = 0; index < elems; index++)
  633. {
  634. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  635. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  636. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  637. }
  638. context.Copy(GetVec(op.Rd), res);
  639. }
  640. public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
  641. {
  642. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  643. Operand res = context.VectorZero();
  644. int elems = op.GetBytesCount() >> op.Size;
  645. for (int index = 0; index < elems; index++)
  646. {
  647. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  648. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  649. Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
  650. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  651. }
  652. context.Copy(GetVec(op.Rd), res);
  653. }
  654. public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
  655. {
  656. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  657. Operand res = context.VectorZero();
  658. Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
  659. int elems = op.GetBytesCount() >> op.Size;
  660. for (int index = 0; index < elems; index++)
  661. {
  662. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  663. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  664. }
  665. context.Copy(GetVec(op.Rd), res);
  666. }
  667. public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
  668. {
  669. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  670. Operand res = context.VectorZero();
  671. Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
  672. int elems = op.GetBytesCount() >> op.Size;
  673. for (int index = 0; index < elems; index++)
  674. {
  675. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  676. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
  677. }
  678. context.Copy(GetVec(op.Rd), res);
  679. }
  680. public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
  681. {
  682. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  683. Operand res = context.VectorZero();
  684. Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
  685. int elems = op.GetBytesCount() >> op.Size;
  686. for (int index = 0; index < elems; index++)
  687. {
  688. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  689. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  690. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
  691. }
  692. context.Copy(GetVec(op.Rd), res);
  693. }
  694. public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
  695. {
  696. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  697. Operand imm = Const(op.Immediate);
  698. Operand res = context.VectorZero();
  699. int elems = op.GetBytesCount() >> op.Size;
  700. for (int index = 0; index < elems; index++)
  701. {
  702. res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
  703. }
  704. context.Copy(GetVec(op.Rd), res);
  705. }
  706. public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
  707. {
  708. OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
  709. Operand imm = Const(op.Immediate);
  710. Operand res = context.VectorZero();
  711. int elems = op.GetBytesCount() >> op.Size;
  712. for (int index = 0; index < elems; index++)
  713. {
  714. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  715. res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
  716. }
  717. context.Copy(GetVec(op.Rd), res);
  718. }
  719. public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
  720. {
  721. EmitVectorWidenRmBinaryOp(context, emit, signed: true);
  722. }
  723. public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
  724. {
  725. EmitVectorWidenRmBinaryOp(context, emit, signed: false);
  726. }
  727. private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
  728. {
  729. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  730. Operand res = context.VectorZero();
  731. int elems = 8 >> op.Size;
  732. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  733. for (int index = 0; index < elems; index++)
  734. {
  735. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
  736. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  737. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  738. }
  739. context.Copy(GetVec(op.Rd), res);
  740. }
  741. public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
  742. {
  743. EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
  744. }
  745. public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
  746. {
  747. EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
  748. }
  749. private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
  750. {
  751. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  752. Operand res = context.VectorZero();
  753. int elems = 8 >> op.Size;
  754. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  755. for (int index = 0; index < elems; index++)
  756. {
  757. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  758. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  759. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  760. }
  761. context.Copy(GetVec(op.Rd), res);
  762. }
  763. public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
  764. {
  765. EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
  766. }
  767. public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
  768. {
  769. EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
  770. }
  771. private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
  772. {
  773. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  774. Operand res = context.VectorZero();
  775. int elems = 8 >> op.Size;
  776. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  777. for (int index = 0; index < elems; index++)
  778. {
  779. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
  780. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  781. Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
  782. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
  783. }
  784. context.Copy(GetVec(op.Rd), res);
  785. }
  786. public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
  787. {
  788. EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
  789. }
  790. public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
  791. {
  792. EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
  793. }
  794. private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
  795. {
  796. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  797. Operand res = context.VectorZero();
  798. Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
  799. int elems = 8 >> op.Size;
  800. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  801. for (int index = 0; index < elems; index++)
  802. {
  803. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  804. res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
  805. }
  806. context.Copy(GetVec(op.Rd), res);
  807. }
  808. public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
  809. {
  810. EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
  811. }
  812. public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
  813. {
  814. EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
  815. }
  816. private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
  817. {
  818. OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
  819. Operand res = context.VectorZero();
  820. Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
  821. int elems = 8 >> op.Size;
  822. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  823. for (int index = 0; index < elems; index++)
  824. {
  825. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
  826. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  827. res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
  828. }
  829. context.Copy(GetVec(op.Rd), res);
  830. }
  831. public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
  832. {
  833. EmitVectorPairwiseOp(context, emit, signed: true);
  834. }
  835. public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
  836. {
  837. EmitVectorPairwiseOp(context, emit, signed: false);
  838. }
  839. private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
  840. {
  841. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  842. Operand res = context.VectorZero();
  843. int pairs = op.GetPairsCount() >> op.Size;
  844. for (int index = 0; index < pairs; index++)
  845. {
  846. int pairIndex = index << 1;
  847. Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
  848. Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
  849. Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
  850. Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
  851. res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
  852. res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
  853. }
  854. context.Copy(GetVec(op.Rd), res);
  855. }
  856. public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
  857. {
  858. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  859. Operand n = GetVec(op.Rn);
  860. Operand m = GetVec(op.Rm);
  861. if (op.RegisterSize == RegisterSize.Simd64)
  862. {
  863. Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
  864. Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
  865. Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
  866. Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
  867. Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
  868. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
  869. }
  870. else if (op.Size < 3)
  871. {
  872. Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
  873. Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
  874. Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
  875. Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
  876. Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
  877. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
  878. }
  879. else
  880. {
  881. Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
  882. Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
  883. context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
  884. }
  885. }
  886. public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
  887. {
  888. EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
  889. }
  890. public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
  891. {
  892. EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
  893. }
  894. public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
  895. {
  896. EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
  897. }
  898. public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
  899. {
  900. EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
  901. }
  902. private static void EmitVectorAcrossVectorOp(
  903. ArmEmitterContext context,
  904. Func2I emit,
  905. bool signed,
  906. bool isLong)
  907. {
  908. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  909. int elems = op.GetBytesCount() >> op.Size;
  910. Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
  911. for (int index = 1; index < elems; index++)
  912. {
  913. Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  914. res = emit(res, n);
  915. }
  916. int size = isLong ? op.Size + 1 : op.Size;
  917. Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
  918. context.Copy(GetVec(op.Rd), d);
  919. }
  920. public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
  921. {
  922. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  923. Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
  924. Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  925. for (int index = 1; index < 4; index++)
  926. {
  927. Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
  928. res = emit(res, n);
  929. }
  930. Operand d = context.VectorInsert(context.VectorZero(), res, 0);
  931. context.Copy(GetVec(op.Rd), d);
  932. }
  933. public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
  934. {
  935. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  936. Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
  937. const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
  938. const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
  939. const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
  940. const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
  941. Operand nCopy = context.Copy(GetVec(op.Rn));
  942. Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
  943. Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
  944. Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
  945. Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
  946. Operand res = emit(emit(part0, part1), emit(part2, part3));
  947. context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
  948. }
  949. public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
  950. {
  951. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  952. OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
  953. Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
  954. Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
  955. Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
  956. context.Copy(GetVec(op.Rd), res);
  957. }
  958. public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
  959. {
  960. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  961. Operand n = GetVec(op.Rn);
  962. Operand op0, op1;
  963. if ((op.Size & 1) == 0)
  964. {
  965. const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
  966. const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
  967. Operand zeroN = context.VectorZeroUpper64(n);
  968. op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
  969. op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
  970. }
  971. else /* if ((op.Size & 1) == 1) */
  972. {
  973. Operand zero = context.VectorZero();
  974. op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
  975. op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
  976. }
  977. context.Copy(GetVec(op.Rd), emit(op0, op1));
  978. }
  979. public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
  980. {
  981. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  982. Operand res = context.VectorZero();
  983. int sizeF = op.Size & 1;
  984. OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
  985. int pairs = op.GetPairsCount() >> sizeF + 2;
  986. for (int index = 0; index < pairs; index++)
  987. {
  988. int pairIndex = index << 1;
  989. Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
  990. Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
  991. Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
  992. Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
  993. res = context.VectorInsert(res, emit(n0, n1), index);
  994. res = context.VectorInsert(res, emit(m0, m1), pairs + index);
  995. }
  996. context.Copy(GetVec(op.Rd), res);
  997. }
  998. public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
  999. {
  1000. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1001. Operand nCopy = context.Copy(GetVec(op.Rn));
  1002. Operand mCopy = context.Copy(GetVec(op.Rm));
  1003. int sizeF = op.Size & 1;
  1004. if (sizeF == 0)
  1005. {
  1006. if (op.RegisterSize == RegisterSize.Simd64)
  1007. {
  1008. Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
  1009. Operand zero = context.VectorZero();
  1010. Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
  1011. Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
  1012. context.Copy(GetVec(op.Rd), emit(part0, part1));
  1013. }
  1014. else /* if (op.RegisterSize == RegisterSize.Simd128) */
  1015. {
  1016. const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
  1017. const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
  1018. Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
  1019. Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
  1020. context.Copy(GetVec(op.Rd), emit(part0, part1));
  1021. }
  1022. }
  1023. else /* if (sizeF == 1) */
  1024. {
  1025. Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
  1026. Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
  1027. context.Copy(GetVec(op.Rd), emit(part0, part1));
  1028. }
  1029. }
  1030. public enum CmpCondition
  1031. {
  1032. // Legacy Sse.
  1033. Equal = 0, // Ordered, non-signaling.
  1034. LessThan = 1, // Ordered, signaling.
  1035. LessThanOrEqual = 2, // Ordered, signaling.
  1036. UnorderedQ = 3, // Non-signaling.
  1037. NotLessThan = 5, // Unordered, signaling.
  1038. NotLessThanOrEqual = 6, // Unordered, signaling.
  1039. OrderedQ = 7, // Non-signaling.
  1040. // Vex.
  1041. GreaterThanOrEqual = 13, // Ordered, signaling.
  1042. GreaterThan = 14, // Ordered, signaling.
  1043. OrderedS = 23 // Signaling.
  1044. }
  1045. [Flags]
  1046. public enum SaturatingFlags
  1047. {
  1048. None = 0,
  1049. ByElem = 1 << 0,
  1050. Scalar = 1 << 1,
  1051. Signed = 1 << 2,
  1052. Add = 1 << 3,
  1053. Sub = 1 << 4,
  1054. Accumulate = 1 << 5
  1055. }
  1056. public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
  1057. {
  1058. EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
  1059. }
  1060. public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
  1061. {
  1062. EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
  1063. }
  1064. public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
  1065. {
  1066. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1067. Operand res = context.VectorZero();
  1068. bool scalar = (flags & SaturatingFlags.Scalar) != 0;
  1069. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  1070. for (int index = 0; index < elems; index++)
  1071. {
  1072. Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
  1073. Operand de;
  1074. if (op.Size <= 2)
  1075. {
  1076. de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
  1077. }
  1078. else /* if (op.Size == 3) */
  1079. {
  1080. de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
  1081. }
  1082. res = EmitVectorInsert(context, res, de, index, op.Size);
  1083. }
  1084. context.Copy(GetVec(op.Rd), res);
  1085. }
  1086. public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
  1087. {
  1088. EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
  1089. }
  1090. public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
  1091. {
  1092. EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
  1093. }
  1094. public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
  1095. {
  1096. EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
  1097. }
  1098. public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
  1099. {
  1100. EmitSaturatingBinaryOp(context, null, flags);
  1101. }
  1102. public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
  1103. {
  1104. EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
  1105. }
  1106. public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
  1107. {
  1108. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1109. Operand res = context.VectorZero();
  1110. bool byElem = (flags & SaturatingFlags.ByElem) != 0;
  1111. bool scalar = (flags & SaturatingFlags.Scalar) != 0;
  1112. bool signed = (flags & SaturatingFlags.Signed) != 0;
  1113. bool add = (flags & SaturatingFlags.Add) != 0;
  1114. bool sub = (flags & SaturatingFlags.Sub) != 0;
  1115. bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
  1116. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  1117. if (add || sub)
  1118. {
  1119. for (int index = 0; index < elems; index++)
  1120. {
  1121. Operand de;
  1122. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  1123. Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
  1124. if (op.Size <= 2)
  1125. {
  1126. Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
  1127. de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
  1128. }
  1129. else /* if (op.Size == 3) */
  1130. {
  1131. if (add)
  1132. {
  1133. de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
  1134. }
  1135. else /* if (sub) */
  1136. {
  1137. de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
  1138. }
  1139. }
  1140. res = EmitVectorInsert(context, res, de, index, op.Size);
  1141. }
  1142. }
  1143. else if (accumulate)
  1144. {
  1145. for (int index = 0; index < elems; index++)
  1146. {
  1147. Operand de;
  1148. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
  1149. Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  1150. if (op.Size <= 2)
  1151. {
  1152. Operand temp = context.Add(ne, me);
  1153. de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
  1154. }
  1155. else /* if (op.Size == 3) */
  1156. {
  1157. de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
  1158. }
  1159. res = EmitVectorInsert(context, res, de, index, op.Size);
  1160. }
  1161. }
  1162. else
  1163. {
  1164. Operand me = default;
  1165. if (byElem)
  1166. {
  1167. OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
  1168. me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
  1169. }
  1170. for (int index = 0; index < elems; index++)
  1171. {
  1172. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  1173. if (!byElem)
  1174. {
  1175. me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
  1176. }
  1177. Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
  1178. res = EmitVectorInsert(context, res, de, index, op.Size);
  1179. }
  1180. }
  1181. context.Copy(GetVec(op.Rd), res);
  1182. }
  1183. [Flags]
  1184. public enum SaturatingNarrowFlags
  1185. {
  1186. Scalar = 1 << 0,
  1187. SignedSrc = 1 << 1,
  1188. SignedDst = 1 << 2,
  1189. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  1190. ScalarSxZx = Scalar | SignedSrc,
  1191. ScalarZxZx = Scalar,
  1192. VectorSxSx = SignedSrc | SignedDst,
  1193. VectorSxZx = SignedSrc,
  1194. VectorZxZx = 0
  1195. }
  1196. public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
  1197. {
  1198. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1199. bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
  1200. bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
  1201. bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
  1202. int elems = !scalar ? 8 >> op.Size : 1;
  1203. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  1204. Operand d = GetVec(op.Rd);
  1205. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  1206. for (int index = 0; index < elems; index++)
  1207. {
  1208. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  1209. Operand temp = signedSrc
  1210. ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
  1211. : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
  1212. res = EmitVectorInsert(context, res, temp, part + index, op.Size);
  1213. }
  1214. context.Copy(d, res);
  1215. }
  1216. // long SignedSignSatQ(long op, int size);
  1217. public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
  1218. {
  1219. int eSize = 8 << size;
  1220. Debug.Assert(op.Type == OperandType.I64);
  1221. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  1222. Operand lbl1 = Label();
  1223. Operand lblEnd = Label();
  1224. Operand zeroL = Const(0L);
  1225. Operand maxT = Const((1L << (eSize - 1)) - 1L);
  1226. Operand minT = Const(-(1L << (eSize - 1)));
  1227. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
  1228. context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
  1229. context.Copy(res, maxT);
  1230. SetFpFlag(context, FPState.QcFlag, Const(1));
  1231. context.Branch(lblEnd);
  1232. context.MarkLabel(lbl1);
  1233. context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
  1234. context.Copy(res, minT);
  1235. SetFpFlag(context, FPState.QcFlag, Const(1));
  1236. context.Branch(lblEnd);
  1237. context.MarkLabel(lblEnd);
  1238. return res;
  1239. }
  1240. // private static ulong UnsignedSignSatQ(ulong op, int size);
  1241. public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
  1242. {
  1243. int eSize = 8 << size;
  1244. Debug.Assert(op.Type == OperandType.I64);
  1245. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  1246. Operand lblEnd = Label();
  1247. Operand zeroUL = Const(0UL);
  1248. Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
  1249. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
  1250. context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
  1251. context.Copy(res, maxT);
  1252. SetFpFlag(context, FPState.QcFlag, Const(1));
  1253. context.Branch(lblEnd);
  1254. context.MarkLabel(lblEnd);
  1255. return res;
  1256. }
  1257. // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
  1258. // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
  1259. public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
  1260. {
  1261. int eSizeDst = 8 << sizeDst;
  1262. Debug.Assert(op.Type == OperandType.I64);
  1263. Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
  1264. Operand lbl1 = Label();
  1265. Operand lblEnd = Label();
  1266. Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
  1267. Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
  1268. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  1269. context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
  1270. context.Copy(res, maxT);
  1271. SetFpFlag(context, FPState.QcFlag, Const(1));
  1272. context.Branch(lblEnd);
  1273. context.MarkLabel(lbl1);
  1274. context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
  1275. context.Copy(res, minT);
  1276. SetFpFlag(context, FPState.QcFlag, Const(1));
  1277. context.Branch(lblEnd);
  1278. context.MarkLabel(lblEnd);
  1279. return res;
  1280. }
  1281. // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
  1282. // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
  1283. public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
  1284. {
  1285. int eSizeDst = 8 << sizeDst;
  1286. Debug.Assert(op.Type == OperandType.I64);
  1287. Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
  1288. Operand lblEnd = Label();
  1289. Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
  1290. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  1291. context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
  1292. context.Copy(res, maxT);
  1293. SetFpFlag(context, FPState.QcFlag, Const(1));
  1294. context.Branch(lblEnd);
  1295. context.MarkLabel(lblEnd);
  1296. return res;
  1297. }
  1298. // long UnarySignedSatQAbsOrNeg(long op);
  1299. private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
  1300. {
  1301. Debug.Assert(op.Type == OperandType.I64);
  1302. Operand lblEnd = Label();
  1303. Operand minL = Const(long.MinValue);
  1304. Operand maxL = Const(long.MaxValue);
  1305. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  1306. context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
  1307. context.Copy(res, maxL);
  1308. SetFpFlag(context, FPState.QcFlag, Const(1));
  1309. context.Branch(lblEnd);
  1310. context.MarkLabel(lblEnd);
  1311. return res;
  1312. }
  1313. // long BinarySignedSatQAdd(long op1, long op2);
  1314. public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
  1315. {
  1316. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1317. Operand lblEnd = Label();
  1318. Operand minL = Const(long.MinValue);
  1319. Operand maxL = Const(long.MaxValue);
  1320. Operand zeroL = Const(0L);
  1321. Operand add = context.Add(op1, op2);
  1322. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
  1323. Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
  1324. Operand right = context.BitwiseExclusiveOr(op1, add);
  1325. context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
  1326. Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
  1327. context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
  1328. SetFpFlag(context, FPState.QcFlag, Const(1));
  1329. context.Branch(lblEnd);
  1330. context.MarkLabel(lblEnd);
  1331. return res;
  1332. }
  1333. // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
  1334. public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
  1335. {
  1336. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1337. Operand lblEnd = Label();
  1338. Operand maxUL = Const(ulong.MaxValue);
  1339. Operand add = context.Add(op1, op2);
  1340. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
  1341. context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
  1342. context.Copy(res, maxUL);
  1343. SetFpFlag(context, FPState.QcFlag, Const(1));
  1344. context.Branch(lblEnd);
  1345. context.MarkLabel(lblEnd);
  1346. return res;
  1347. }
  1348. // long BinarySignedSatQSub(long op1, long op2);
  1349. public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
  1350. {
  1351. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1352. Operand lblEnd = Label();
  1353. Operand minL = Const(long.MinValue);
  1354. Operand maxL = Const(long.MaxValue);
  1355. Operand zeroL = Const(0L);
  1356. Operand sub = context.Subtract(op1, op2);
  1357. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
  1358. Operand left = context.BitwiseExclusiveOr(op1, op2);
  1359. Operand right = context.BitwiseExclusiveOr(op1, sub);
  1360. context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
  1361. Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
  1362. context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
  1363. SetFpFlag(context, FPState.QcFlag, Const(1));
  1364. context.Branch(lblEnd);
  1365. context.MarkLabel(lblEnd);
  1366. return res;
  1367. }
  1368. // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
  1369. public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
  1370. {
  1371. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1372. Operand lblEnd = Label();
  1373. Operand zeroL = Const(0L);
  1374. Operand sub = context.Subtract(op1, op2);
  1375. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
  1376. context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
  1377. context.Copy(res, zeroL);
  1378. SetFpFlag(context, FPState.QcFlag, Const(1));
  1379. context.Branch(lblEnd);
  1380. context.MarkLabel(lblEnd);
  1381. return res;
  1382. }
  1383. // long BinarySignedSatQAcc(ulong op1, long op2);
  1384. private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
  1385. {
  1386. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1387. Operand lbl1 = Label();
  1388. Operand lbl2 = Label();
  1389. Operand lblEnd = Label();
  1390. Operand maxL = Const(long.MaxValue);
  1391. Operand zeroL = Const(0L);
  1392. Operand add = context.Add(op1, op2);
  1393. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
  1394. context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
  1395. Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
  1396. context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
  1397. context.Copy(res, maxL);
  1398. SetFpFlag(context, FPState.QcFlag, Const(1));
  1399. context.Branch(lblEnd);
  1400. context.MarkLabel(lbl1);
  1401. context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
  1402. context.Copy(res, maxL);
  1403. SetFpFlag(context, FPState.QcFlag, Const(1));
  1404. context.Branch(lblEnd);
  1405. context.MarkLabel(lbl2);
  1406. context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
  1407. context.Copy(res, maxL);
  1408. SetFpFlag(context, FPState.QcFlag, Const(1));
  1409. context.Branch(lblEnd);
  1410. context.MarkLabel(lblEnd);
  1411. return res;
  1412. }
  1413. // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
  1414. private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
  1415. {
  1416. Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
  1417. Operand lbl1 = Label();
  1418. Operand lblEnd = Label();
  1419. Operand maxUL = Const(ulong.MaxValue);
  1420. Operand maxL = Const(long.MaxValue);
  1421. Operand zeroL = Const(0L);
  1422. Operand add = context.Add(op1, op2);
  1423. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
  1424. context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
  1425. context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
  1426. context.Copy(res, maxUL);
  1427. SetFpFlag(context, FPState.QcFlag, Const(1));
  1428. context.Branch(lblEnd);
  1429. context.MarkLabel(lbl1);
  1430. context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
  1431. context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
  1432. context.Copy(res, zeroL);
  1433. SetFpFlag(context, FPState.QcFlag, Const(1));
  1434. context.Branch(lblEnd);
  1435. context.MarkLabel(lblEnd);
  1436. return res;
  1437. }
  1438. public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
  1439. {
  1440. Operand mask;
  1441. if (single)
  1442. {
  1443. mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
  1444. }
  1445. else
  1446. {
  1447. mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
  1448. }
  1449. return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
  1450. }
  1451. public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
  1452. {
  1453. return EmitVectorExtract(context, reg, index, size, true);
  1454. }
  1455. public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
  1456. {
  1457. return EmitVectorExtract(context, reg, index, size, false);
  1458. }
  1459. public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
  1460. {
  1461. ThrowIfInvalid(index, size);
  1462. Operand res = default;
  1463. switch (size)
  1464. {
  1465. case 0:
  1466. res = context.VectorExtract8(GetVec(reg), index);
  1467. break;
  1468. case 1:
  1469. res = context.VectorExtract16(GetVec(reg), index);
  1470. break;
  1471. case 2:
  1472. res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
  1473. break;
  1474. case 3:
  1475. res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
  1476. break;
  1477. }
  1478. if (signed)
  1479. {
  1480. switch (size)
  1481. {
  1482. case 0: res = context.SignExtend8 (OperandType.I64, res); break;
  1483. case 1: res = context.SignExtend16(OperandType.I64, res); break;
  1484. case 2: res = context.SignExtend32(OperandType.I64, res); break;
  1485. }
  1486. }
  1487. else
  1488. {
  1489. switch (size)
  1490. {
  1491. case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
  1492. case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
  1493. case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
  1494. }
  1495. }
  1496. return res;
  1497. }
  1498. public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
  1499. {
  1500. ThrowIfInvalid(index, size);
  1501. if (size < 3 && value.Type == OperandType.I64)
  1502. {
  1503. value = context.ConvertI64ToI32(value);
  1504. }
  1505. switch (size)
  1506. {
  1507. case 0: vector = context.VectorInsert8 (vector, value, index); break;
  1508. case 1: vector = context.VectorInsert16(vector, value, index); break;
  1509. case 2: vector = context.VectorInsert (vector, value, index); break;
  1510. case 3: vector = context.VectorInsert (vector, value, index); break;
  1511. }
  1512. return vector;
  1513. }
  1514. public static void ThrowIfInvalid(int index, int size)
  1515. {
  1516. if ((uint)size > 3u)
  1517. {
  1518. throw new ArgumentOutOfRangeException(nameof(size));
  1519. }
  1520. if ((uint)index >= 16u >> size)
  1521. {
  1522. throw new ArgumentOutOfRangeException(nameof(index));
  1523. }
  1524. }
  1525. }
  1526. }