InstEmitSimdShift.cs 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ARMeilleure.Decoders;
  3. using ARMeilleure.IntermediateRepresentation;
  4. using ARMeilleure.Translation;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Reflection;
  8. using static ARMeilleure.Instructions.InstEmitHelper;
  9. using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10. using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  11. namespace ARMeilleure.Instructions
  12. {
  13. using Func2I = Func<Operand, Operand, Operand>;
  14. static partial class InstEmit
  15. {
  16. #region "Masks"
  17. private static readonly long[] _masks_SliSri = new long[] // Replication masks.
  18. {
  19. 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L
  20. };
  21. #endregion
  22. public static void Rshrn_V(ArmEmitterContext context)
  23. {
  24. if (Optimizations.UseAdvSimd)
  25. {
  26. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  27. int shift = GetImmShr(op);
  28. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
  29. }
  30. else if (Optimizations.UseSsse3)
  31. {
  32. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  33. int shift = GetImmShr(op);
  34. long roundConst = 1L << (shift - 1);
  35. Operand d = GetVec(op.Rd);
  36. Operand n = GetVec(op.Rn);
  37. Operand dLow = context.VectorZeroUpper64(d);
  38. Operand mask = default;
  39. switch (op.Size + 1)
  40. {
  41. case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
  42. case 2: mask = X86GetAllElements(context, (int)roundConst); break;
  43. case 3: mask = X86GetAllElements(context, roundConst); break;
  44. }
  45. Intrinsic addInst = X86PaddInstruction[op.Size + 1];
  46. Operand res = context.AddIntrinsic(addInst, n, mask);
  47. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  48. res = context.AddIntrinsic(srlInst, res, Const(shift));
  49. Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]);
  50. res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
  51. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  52. ? Intrinsic.X86Movlhps
  53. : Intrinsic.X86Movhlps;
  54. res = context.AddIntrinsic(movInst, dLow, res);
  55. context.Copy(d, res);
  56. }
  57. else
  58. {
  59. EmitVectorShrImmNarrowOpZx(context, round: true);
  60. }
  61. }
  62. public static void Shl_S(ArmEmitterContext context)
  63. {
  64. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  65. int shift = GetImmShl(op);
  66. if (Optimizations.UseAdvSimd)
  67. {
  68. InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
  69. }
  70. else
  71. {
  72. EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  73. }
  74. }
  75. public static void Shl_V(ArmEmitterContext context)
  76. {
  77. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  78. int shift = GetImmShl(op);
  79. int eSize = 8 << op.Size;
  80. if (Optimizations.UseAdvSimd)
  81. {
  82. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
  83. }
  84. else if (shift >= eSize)
  85. {
  86. if ((op.RegisterSize == RegisterSize.Simd64))
  87. {
  88. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  89. context.Copy(GetVec(op.Rd), res);
  90. }
  91. }
  92. else if (Optimizations.UseGfni && op.Size == 0)
  93. {
  94. Operand n = GetVec(op.Rn);
  95. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
  96. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  97. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  98. if (op.RegisterSize == RegisterSize.Simd64)
  99. {
  100. res = context.VectorZeroUpper64(res);
  101. }
  102. context.Copy(GetVec(op.Rd), res);
  103. }
  104. else if (Optimizations.UseSse2 && op.Size > 0)
  105. {
  106. Operand n = GetVec(op.Rn);
  107. Intrinsic sllInst = X86PsllInstruction[op.Size];
  108. Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
  109. if (op.RegisterSize == RegisterSize.Simd64)
  110. {
  111. res = context.VectorZeroUpper64(res);
  112. }
  113. context.Copy(GetVec(op.Rd), res);
  114. }
  115. else
  116. {
  117. EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
  118. }
  119. }
  120. public static void Shll_V(ArmEmitterContext context)
  121. {
  122. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  123. int shift = 8 << op.Size;
  124. if (Optimizations.UseAdvSimd)
  125. {
  126. InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
  127. }
  128. else if (Optimizations.UseSse41)
  129. {
  130. Operand n = GetVec(op.Rn);
  131. if (op.RegisterSize == RegisterSize.Simd128)
  132. {
  133. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  134. }
  135. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  136. Operand res = context.AddIntrinsic(movsxInst, n);
  137. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  138. res = context.AddIntrinsic(sllInst, res, Const(shift));
  139. context.Copy(GetVec(op.Rd), res);
  140. }
  141. else
  142. {
  143. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  144. }
  145. }
  146. public static void Shrn_V(ArmEmitterContext context)
  147. {
  148. if (Optimizations.UseAdvSimd)
  149. {
  150. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  151. int shift = GetImmShr(op);
  152. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
  153. }
  154. else if (Optimizations.UseSsse3)
  155. {
  156. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  157. int shift = GetImmShr(op);
  158. Operand d = GetVec(op.Rd);
  159. Operand n = GetVec(op.Rn);
  160. Operand dLow = context.VectorZeroUpper64(d);
  161. Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
  162. Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
  163. Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
  164. Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
  165. Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
  166. ? Intrinsic.X86Movlhps
  167. : Intrinsic.X86Movhlps;
  168. res = context.AddIntrinsic(movInst, dLow, res);
  169. context.Copy(d, res);
  170. }
  171. else
  172. {
  173. EmitVectorShrImmNarrowOpZx(context, round: false);
  174. }
  175. }
  176. public static void Sli_S(ArmEmitterContext context)
  177. {
  178. if (Optimizations.UseAdvSimd)
  179. {
  180. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  181. int shift = GetImmShl(op);
  182. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
  183. }
  184. else
  185. {
  186. EmitSli(context, scalar: true);
  187. }
  188. }
  189. public static void Sli_V(ArmEmitterContext context)
  190. {
  191. if (Optimizations.UseAdvSimd)
  192. {
  193. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  194. int shift = GetImmShl(op);
  195. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
  196. }
  197. else
  198. {
  199. EmitSli(context, scalar: false);
  200. }
  201. }
  202. public static void Sqrshl_V(ArmEmitterContext context)
  203. {
  204. if (Optimizations.UseAdvSimd)
  205. {
  206. InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
  207. }
  208. else
  209. {
  210. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
  211. }
  212. }
  213. public static void Sqrshrn_S(ArmEmitterContext context)
  214. {
  215. if (Optimizations.UseAdvSimd)
  216. {
  217. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  218. int shift = GetImmShr(op);
  219. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
  220. }
  221. else
  222. {
  223. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  224. }
  225. }
  226. public static void Sqrshrn_V(ArmEmitterContext context)
  227. {
  228. if (Optimizations.UseAdvSimd)
  229. {
  230. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  231. int shift = GetImmShr(op);
  232. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
  233. }
  234. else
  235. {
  236. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  237. }
  238. }
  239. public static void Sqrshrun_S(ArmEmitterContext context)
  240. {
  241. if (Optimizations.UseAdvSimd)
  242. {
  243. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  244. int shift = GetImmShr(op);
  245. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
  246. }
  247. else
  248. {
  249. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  250. }
  251. }
  252. public static void Sqrshrun_V(ArmEmitterContext context)
  253. {
  254. if (Optimizations.UseAdvSimd)
  255. {
  256. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  257. int shift = GetImmShr(op);
  258. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
  259. }
  260. else
  261. {
  262. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  263. }
  264. }
  265. public static void Sqshl_V(ArmEmitterContext context)
  266. {
  267. if (Optimizations.UseAdvSimd)
  268. {
  269. InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
  270. }
  271. else
  272. {
  273. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
  274. }
  275. }
  276. public static void Sqshrn_S(ArmEmitterContext context)
  277. {
  278. if (Optimizations.UseAdvSimd)
  279. {
  280. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  281. int shift = GetImmShr(op);
  282. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
  283. }
  284. else
  285. {
  286. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  287. }
  288. }
  289. public static void Sqshrn_V(ArmEmitterContext context)
  290. {
  291. if (Optimizations.UseAdvSimd)
  292. {
  293. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  294. int shift = GetImmShr(op);
  295. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
  296. }
  297. else
  298. {
  299. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  300. }
  301. }
  302. public static void Sqshrun_S(ArmEmitterContext context)
  303. {
  304. if (Optimizations.UseAdvSimd)
  305. {
  306. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  307. int shift = GetImmShr(op);
  308. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
  309. }
  310. else
  311. {
  312. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  313. }
  314. }
  315. public static void Sqshrun_V(ArmEmitterContext context)
  316. {
  317. if (Optimizations.UseAdvSimd)
  318. {
  319. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  320. int shift = GetImmShr(op);
  321. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
  322. }
  323. else
  324. {
  325. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  326. }
  327. }
  328. public static void Sri_S(ArmEmitterContext context)
  329. {
  330. if (Optimizations.UseAdvSimd)
  331. {
  332. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  333. int shift = GetImmShr(op);
  334. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
  335. }
  336. else
  337. {
  338. EmitSri(context, scalar: true);
  339. }
  340. }
  341. public static void Sri_V(ArmEmitterContext context)
  342. {
  343. if (Optimizations.UseAdvSimd)
  344. {
  345. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  346. int shift = GetImmShr(op);
  347. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
  348. }
  349. else
  350. {
  351. EmitSri(context, scalar: false);
  352. }
  353. }
  354. public static void Srshl_V(ArmEmitterContext context)
  355. {
  356. if (Optimizations.UseAdvSimd)
  357. {
  358. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
  359. }
  360. else
  361. {
  362. EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
  363. }
  364. }
  365. public static void Srshr_S(ArmEmitterContext context)
  366. {
  367. if (Optimizations.UseAdvSimd)
  368. {
  369. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  370. int shift = GetImmShr(op);
  371. InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
  372. }
  373. else
  374. {
  375. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  376. }
  377. }
  378. public static void Srshr_V(ArmEmitterContext context)
  379. {
  380. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  381. if (Optimizations.UseAdvSimd)
  382. {
  383. int shift = GetImmShr(op);
  384. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
  385. }
  386. else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  387. {
  388. int shift = GetImmShr(op);
  389. int eSize = 8 << op.Size;
  390. Operand n = GetVec(op.Rn);
  391. Intrinsic sllInst = X86PsllInstruction[op.Size];
  392. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  393. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  394. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  395. Intrinsic sraInst = X86PsraInstruction[op.Size];
  396. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  397. Intrinsic addInst = X86PaddInstruction[op.Size];
  398. res = context.AddIntrinsic(addInst, res, nSra);
  399. if (op.RegisterSize == RegisterSize.Simd64)
  400. {
  401. res = context.VectorZeroUpper64(res);
  402. }
  403. context.Copy(GetVec(op.Rd), res);
  404. }
  405. else
  406. {
  407. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  408. }
  409. }
  410. public static void Srsra_S(ArmEmitterContext context)
  411. {
  412. if (Optimizations.UseAdvSimd)
  413. {
  414. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  415. int shift = GetImmShr(op);
  416. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
  417. }
  418. else
  419. {
  420. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  421. }
  422. }
  423. public static void Srsra_V(ArmEmitterContext context)
  424. {
  425. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  426. if (Optimizations.UseAdvSimd)
  427. {
  428. int shift = GetImmShr(op);
  429. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
  430. }
  431. else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  432. {
  433. int shift = GetImmShr(op);
  434. int eSize = 8 << op.Size;
  435. Operand d = GetVec(op.Rd);
  436. Operand n = GetVec(op.Rn);
  437. Intrinsic sllInst = X86PsllInstruction[op.Size];
  438. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  439. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  440. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  441. Intrinsic sraInst = X86PsraInstruction[op.Size];
  442. Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
  443. Intrinsic addInst = X86PaddInstruction[op.Size];
  444. res = context.AddIntrinsic(addInst, res, nSra);
  445. res = context.AddIntrinsic(addInst, res, d);
  446. if (op.RegisterSize == RegisterSize.Simd64)
  447. {
  448. res = context.VectorZeroUpper64(res);
  449. }
  450. context.Copy(d, res);
  451. }
  452. else
  453. {
  454. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  455. }
  456. }
  457. public static void Sshl_S(ArmEmitterContext context)
  458. {
  459. if (Optimizations.UseAdvSimd)
  460. {
  461. InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
  462. }
  463. else
  464. {
  465. EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
  466. }
  467. }
  468. public static void Sshl_V(ArmEmitterContext context)
  469. {
  470. if (Optimizations.UseAdvSimd)
  471. {
  472. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
  473. }
  474. else
  475. {
  476. EmitShlRegOp(context, ShlRegFlags.Signed);
  477. }
  478. }
  479. public static void Sshll_V(ArmEmitterContext context)
  480. {
  481. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  482. int shift = GetImmShl(op);
  483. if (Optimizations.UseAdvSimd)
  484. {
  485. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
  486. }
  487. else if (Optimizations.UseSse41)
  488. {
  489. Operand n = GetVec(op.Rn);
  490. if (op.RegisterSize == RegisterSize.Simd128)
  491. {
  492. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  493. }
  494. Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
  495. Operand res = context.AddIntrinsic(movsxInst, n);
  496. if (shift != 0)
  497. {
  498. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  499. res = context.AddIntrinsic(sllInst, res, Const(shift));
  500. }
  501. context.Copy(GetVec(op.Rd), res);
  502. }
  503. else
  504. {
  505. EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  506. }
  507. }
  508. public static void Sshr_S(ArmEmitterContext context)
  509. {
  510. if (Optimizations.UseAdvSimd)
  511. {
  512. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  513. int shift = GetImmShr(op);
  514. InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
  515. }
  516. else
  517. {
  518. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  519. }
  520. }
  521. public static void Sshr_V(ArmEmitterContext context)
  522. {
  523. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  524. int shift = GetImmShr(op);
  525. if (Optimizations.UseAdvSimd)
  526. {
  527. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
  528. }
  529. else if (Optimizations.UseGfni && op.Size == 0)
  530. {
  531. Operand n = GetVec(op.Rn);
  532. ulong bitMatrix;
  533. if (shift < 8)
  534. {
  535. bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
  536. // Extend sign-bit
  537. bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8);
  538. }
  539. else
  540. {
  541. // Replicate sign-bit into all bits
  542. bitMatrix = 0x8080808080808080UL;
  543. }
  544. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  545. Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  546. if (op.RegisterSize == RegisterSize.Simd64)
  547. {
  548. res = context.VectorZeroUpper64(res);
  549. }
  550. context.Copy(GetVec(op.Rd), res);
  551. }
  552. else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  553. {
  554. Operand n = GetVec(op.Rn);
  555. Intrinsic sraInst = X86PsraInstruction[op.Size];
  556. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  557. if (op.RegisterSize == RegisterSize.Simd64)
  558. {
  559. res = context.VectorZeroUpper64(res);
  560. }
  561. context.Copy(GetVec(op.Rd), res);
  562. }
  563. else
  564. {
  565. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  566. }
  567. }
  568. public static void Ssra_S(ArmEmitterContext context)
  569. {
  570. if (Optimizations.UseAdvSimd)
  571. {
  572. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  573. int shift = GetImmShr(op);
  574. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
  575. }
  576. else
  577. {
  578. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  579. }
  580. }
  581. public static void Ssra_V(ArmEmitterContext context)
  582. {
  583. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  584. if (Optimizations.UseAdvSimd)
  585. {
  586. int shift = GetImmShr(op);
  587. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
  588. }
  589. else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
  590. {
  591. int shift = GetImmShr(op);
  592. Operand d = GetVec(op.Rd);
  593. Operand n = GetVec(op.Rn);
  594. Intrinsic sraInst = X86PsraInstruction[op.Size];
  595. Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
  596. Intrinsic addInst = X86PaddInstruction[op.Size];
  597. res = context.AddIntrinsic(addInst, res, d);
  598. if (op.RegisterSize == RegisterSize.Simd64)
  599. {
  600. res = context.VectorZeroUpper64(res);
  601. }
  602. context.Copy(d, res);
  603. }
  604. else
  605. {
  606. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  607. }
  608. }
  609. public static void Uqrshl_V(ArmEmitterContext context)
  610. {
  611. if (Optimizations.UseAdvSimd)
  612. {
  613. InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
  614. }
  615. else
  616. {
  617. EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
  618. }
  619. }
  620. public static void Uqrshrn_S(ArmEmitterContext context)
  621. {
  622. if (Optimizations.UseAdvSimd)
  623. {
  624. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  625. int shift = GetImmShr(op);
  626. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
  627. }
  628. else
  629. {
  630. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  631. }
  632. }
  633. public static void Uqrshrn_V(ArmEmitterContext context)
  634. {
  635. if (Optimizations.UseAdvSimd)
  636. {
  637. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  638. int shift = GetImmShr(op);
  639. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
  640. }
  641. else
  642. {
  643. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  644. }
  645. }
  646. public static void Uqshl_V(ArmEmitterContext context)
  647. {
  648. if (Optimizations.UseAdvSimd)
  649. {
  650. InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
  651. }
  652. else
  653. {
  654. EmitShlRegOp(context, ShlRegFlags.Saturating);
  655. }
  656. }
  657. public static void Uqshrn_S(ArmEmitterContext context)
  658. {
  659. if (Optimizations.UseAdvSimd)
  660. {
  661. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  662. int shift = GetImmShr(op);
  663. InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
  664. }
  665. else
  666. {
  667. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  668. }
  669. }
  670. public static void Uqshrn_V(ArmEmitterContext context)
  671. {
  672. if (Optimizations.UseAdvSimd)
  673. {
  674. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  675. int shift = GetImmShr(op);
  676. InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
  677. }
  678. else
  679. {
  680. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  681. }
  682. }
  683. public static void Urshl_V(ArmEmitterContext context)
  684. {
  685. if (Optimizations.UseAdvSimd)
  686. {
  687. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
  688. }
  689. else
  690. {
  691. EmitShlRegOp(context, ShlRegFlags.Round);
  692. }
  693. }
  694. public static void Urshr_S(ArmEmitterContext context)
  695. {
  696. if (Optimizations.UseAdvSimd)
  697. {
  698. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  699. int shift = GetImmShr(op);
  700. InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
  701. }
  702. else
  703. {
  704. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  705. }
  706. }
  707. public static void Urshr_V(ArmEmitterContext context)
  708. {
  709. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  710. if (Optimizations.UseAdvSimd)
  711. {
  712. int shift = GetImmShr(op);
  713. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
  714. }
  715. else if (Optimizations.UseSse2 && op.Size > 0)
  716. {
  717. int shift = GetImmShr(op);
  718. int eSize = 8 << op.Size;
  719. Operand n = GetVec(op.Rn);
  720. Intrinsic sllInst = X86PsllInstruction[op.Size];
  721. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  722. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  723. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  724. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  725. Intrinsic addInst = X86PaddInstruction[op.Size];
  726. res = context.AddIntrinsic(addInst, res, nSrl);
  727. if (op.RegisterSize == RegisterSize.Simd64)
  728. {
  729. res = context.VectorZeroUpper64(res);
  730. }
  731. context.Copy(GetVec(op.Rd), res);
  732. }
  733. else
  734. {
  735. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  736. }
  737. }
  738. public static void Ursra_S(ArmEmitterContext context)
  739. {
  740. if (Optimizations.UseAdvSimd)
  741. {
  742. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  743. int shift = GetImmShr(op);
  744. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
  745. }
  746. else
  747. {
  748. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  749. }
  750. }
  751. public static void Ursra_V(ArmEmitterContext context)
  752. {
  753. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  754. if (Optimizations.UseAdvSimd)
  755. {
  756. int shift = GetImmShr(op);
  757. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
  758. }
  759. else if (Optimizations.UseSse2 && op.Size > 0)
  760. {
  761. int shift = GetImmShr(op);
  762. int eSize = 8 << op.Size;
  763. Operand d = GetVec(op.Rd);
  764. Operand n = GetVec(op.Rn);
  765. Intrinsic sllInst = X86PsllInstruction[op.Size];
  766. Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
  767. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  768. res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
  769. Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
  770. Intrinsic addInst = X86PaddInstruction[op.Size];
  771. res = context.AddIntrinsic(addInst, res, nSrl);
  772. res = context.AddIntrinsic(addInst, res, d);
  773. if (op.RegisterSize == RegisterSize.Simd64)
  774. {
  775. res = context.VectorZeroUpper64(res);
  776. }
  777. context.Copy(d, res);
  778. }
  779. else
  780. {
  781. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  782. }
  783. }
  784. public static void Ushl_S(ArmEmitterContext context)
  785. {
  786. if (Optimizations.UseAdvSimd)
  787. {
  788. InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
  789. }
  790. else
  791. {
  792. EmitShlRegOp(context, ShlRegFlags.Scalar);
  793. }
  794. }
  795. public static void Ushl_V(ArmEmitterContext context)
  796. {
  797. if (Optimizations.UseAdvSimd)
  798. {
  799. InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
  800. }
  801. else
  802. {
  803. EmitShlRegOp(context, ShlRegFlags.None);
  804. }
  805. }
  806. public static void Ushll_V(ArmEmitterContext context)
  807. {
  808. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  809. int shift = GetImmShl(op);
  810. if (Optimizations.UseAdvSimd)
  811. {
  812. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
  813. }
  814. else if (Optimizations.UseSse41)
  815. {
  816. Operand n = GetVec(op.Rn);
  817. if (op.RegisterSize == RegisterSize.Simd128)
  818. {
  819. n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
  820. }
  821. Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
  822. Operand res = context.AddIntrinsic(movzxInst, n);
  823. if (shift != 0)
  824. {
  825. Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
  826. res = context.AddIntrinsic(sllInst, res, Const(shift));
  827. }
  828. context.Copy(GetVec(op.Rd), res);
  829. }
  830. else
  831. {
  832. EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
  833. }
  834. }
  835. public static void Ushr_S(ArmEmitterContext context)
  836. {
  837. if (Optimizations.UseAdvSimd)
  838. {
  839. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  840. int shift = GetImmShr(op);
  841. InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
  842. }
  843. else
  844. {
  845. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  846. }
  847. }
  848. public static void Ushr_V(ArmEmitterContext context)
  849. {
  850. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  851. if (Optimizations.UseAdvSimd)
  852. {
  853. int shift = GetImmShr(op);
  854. InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
  855. }
  856. else if (Optimizations.UseSse2 && op.Size > 0)
  857. {
  858. int shift = GetImmShr(op);
  859. Operand n = GetVec(op.Rn);
  860. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  861. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  862. if (op.RegisterSize == RegisterSize.Simd64)
  863. {
  864. res = context.VectorZeroUpper64(res);
  865. }
  866. context.Copy(GetVec(op.Rd), res);
  867. }
  868. else
  869. {
  870. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  871. }
  872. }
  873. public static void Usra_S(ArmEmitterContext context)
  874. {
  875. if (Optimizations.UseAdvSimd)
  876. {
  877. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  878. int shift = GetImmShr(op);
  879. InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
  880. }
  881. else
  882. {
  883. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  884. }
  885. }
  886. public static void Usra_V(ArmEmitterContext context)
  887. {
  888. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  889. if (Optimizations.UseAdvSimd)
  890. {
  891. int shift = GetImmShr(op);
  892. InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
  893. }
  894. else if (Optimizations.UseSse2 && op.Size > 0)
  895. {
  896. int shift = GetImmShr(op);
  897. Operand d = GetVec(op.Rd);
  898. Operand n = GetVec(op.Rn);
  899. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  900. Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
  901. Intrinsic addInst = X86PaddInstruction[op.Size];
  902. res = context.AddIntrinsic(addInst, res, d);
  903. if (op.RegisterSize == RegisterSize.Simd64)
  904. {
  905. res = context.VectorZeroUpper64(res);
  906. }
  907. context.Copy(d, res);
  908. }
  909. else
  910. {
  911. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  912. }
  913. }
  914. [Flags]
  915. private enum ShrImmFlags
  916. {
  917. Scalar = 1 << 0,
  918. Signed = 1 << 1,
  919. Round = 1 << 2,
  920. Accumulate = 1 << 3,
  921. ScalarSx = Scalar | Signed,
  922. ScalarZx = Scalar,
  923. VectorSx = Signed,
  924. VectorZx = 0
  925. }
  926. private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  927. {
  928. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  929. }
  930. private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  931. {
  932. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  933. }
  934. private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
  935. {
  936. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  937. }
  938. private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
  939. {
  940. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  941. }
  942. private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
  943. {
  944. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  945. Operand res = context.VectorZero();
  946. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  947. bool signed = (flags & ShrImmFlags.Signed) != 0;
  948. bool round = (flags & ShrImmFlags.Round) != 0;
  949. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  950. int shift = GetImmShr(op);
  951. long roundConst = 1L << (shift - 1);
  952. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  953. for (int index = 0; index < elems; index++)
  954. {
  955. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  956. if (op.Size <= 2)
  957. {
  958. if (round)
  959. {
  960. e = context.Add(e, Const(roundConst));
  961. }
  962. e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
  963. }
  964. else /* if (op.Size == 3) */
  965. {
  966. e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
  967. }
  968. if (accumulate)
  969. {
  970. Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  971. e = context.Add(e, de);
  972. }
  973. res = EmitVectorInsert(context, res, e, index, op.Size);
  974. }
  975. context.Copy(GetVec(op.Rd), res);
  976. }
  977. private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
  978. {
  979. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  980. int shift = GetImmShr(op);
  981. long roundConst = 1L << (shift - 1);
  982. int elems = 8 >> op.Size;
  983. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  984. Operand d = GetVec(op.Rd);
  985. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  986. for (int index = 0; index < elems; index++)
  987. {
  988. Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  989. if (round)
  990. {
  991. e = context.Add(e, Const(roundConst));
  992. }
  993. e = context.ShiftRightUI(e, Const(shift));
  994. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  995. }
  996. context.Copy(d, res);
  997. }
  998. [Flags]
  999. private enum ShrImmSaturatingNarrowFlags
  1000. {
  1001. Scalar = 1 << 0,
  1002. SignedSrc = 1 << 1,
  1003. SignedDst = 1 << 2,
  1004. Round = 1 << 3,
  1005. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  1006. ScalarSxZx = Scalar | SignedSrc,
  1007. ScalarZxZx = Scalar,
  1008. VectorSxSx = SignedSrc | SignedDst,
  1009. VectorSxZx = SignedSrc,
  1010. VectorZxZx = 0
  1011. }
  1012. private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  1013. {
  1014. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  1015. }
  1016. private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
  1017. {
  1018. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  1019. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  1020. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  1021. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  1022. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  1023. int shift = GetImmShr(op);
  1024. long roundConst = 1L << (shift - 1);
  1025. int elems = !scalar ? 8 >> op.Size : 1;
  1026. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  1027. Operand d = GetVec(op.Rd);
  1028. Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
  1029. for (int index = 0; index < elems; index++)
  1030. {
  1031. Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  1032. if (op.Size <= 1 || !round)
  1033. {
  1034. if (round)
  1035. {
  1036. e = context.Add(e, Const(roundConst));
  1037. }
  1038. e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift));
  1039. }
  1040. else /* if (op.Size == 2 && round) */
  1041. {
  1042. e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
  1043. }
  1044. e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst);
  1045. res = EmitVectorInsert(context, res, e, part + index, op.Size);
  1046. }
  1047. context.Copy(d, res);
  1048. }
  1049. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  1050. private static Operand EmitShrImm64(
  1051. ArmEmitterContext context,
  1052. Operand value,
  1053. bool signed,
  1054. long roundConst,
  1055. int shift)
  1056. {
  1057. MethodInfo info = signed
  1058. ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))
  1059. : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64));
  1060. return context.Call(info, value, Const(roundConst), Const(shift));
  1061. }
  1062. private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
  1063. {
  1064. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
  1065. }
  1066. private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
  1067. {
  1068. EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
  1069. }
  1070. private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
  1071. {
  1072. OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  1073. Operand res = context.VectorZero();
  1074. int elems = 8 >> op.Size;
  1075. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  1076. for (int index = 0; index < elems; index++)
  1077. {
  1078. Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  1079. res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
  1080. }
  1081. context.Copy(GetVec(op.Rd), res);
  1082. }
  1083. private static void EmitSli(ArmEmitterContext context, bool scalar)
  1084. {
  1085. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  1086. int shift = GetImmShl(op);
  1087. int eSize = 8 << op.Size;
  1088. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
  1089. if (shift >= eSize)
  1090. {
  1091. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1092. {
  1093. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  1094. context.Copy(GetVec(op.Rd), res);
  1095. }
  1096. }
  1097. else if (Optimizations.UseGfni && op.Size == 0)
  1098. {
  1099. Operand d = GetVec(op.Rd);
  1100. Operand n = GetVec(op.Rn);
  1101. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift);
  1102. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  1103. Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  1104. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  1105. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  1106. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  1107. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1108. {
  1109. res = context.VectorZeroUpper64(res);
  1110. }
  1111. context.Copy(d, res);
  1112. }
  1113. else if (Optimizations.UseSse2 && op.Size > 0)
  1114. {
  1115. Operand d = GetVec(op.Rd);
  1116. Operand n = GetVec(op.Rn);
  1117. Intrinsic sllInst = X86PsllInstruction[op.Size];
  1118. Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift));
  1119. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  1120. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  1121. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  1122. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1123. {
  1124. res = context.VectorZeroUpper64(res);
  1125. }
  1126. context.Copy(d, res);
  1127. }
  1128. else
  1129. {
  1130. Operand res = context.VectorZero();
  1131. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  1132. for (int index = 0; index < elems; index++)
  1133. {
  1134. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  1135. Operand neShifted = context.ShiftLeft(ne, Const(shift));
  1136. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  1137. Operand deMasked = context.BitwiseAnd(de, Const(mask));
  1138. Operand e = context.BitwiseOr(neShifted, deMasked);
  1139. res = EmitVectorInsert(context, res, e, index, op.Size);
  1140. }
  1141. context.Copy(GetVec(op.Rd), res);
  1142. }
  1143. }
  1144. private static void EmitSri(ArmEmitterContext context, bool scalar)
  1145. {
  1146. OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
  1147. int shift = GetImmShr(op);
  1148. int eSize = 8 << op.Size;
  1149. ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));
  1150. if (shift >= eSize)
  1151. {
  1152. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1153. {
  1154. Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
  1155. context.Copy(GetVec(op.Rd), res);
  1156. }
  1157. }
  1158. else if (Optimizations.UseGfni && op.Size == 0)
  1159. {
  1160. Operand d = GetVec(op.Rd);
  1161. Operand n = GetVec(op.Rn);
  1162. ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift);
  1163. Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix);
  1164. Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0));
  1165. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  1166. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  1167. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  1168. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1169. {
  1170. res = context.VectorZeroUpper64(res);
  1171. }
  1172. context.Copy(d, res);
  1173. }
  1174. else if (Optimizations.UseSse2 && op.Size > 0)
  1175. {
  1176. Operand d = GetVec(op.Rd);
  1177. Operand n = GetVec(op.Rn);
  1178. Intrinsic srlInst = X86PsrlInstruction[op.Size];
  1179. Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
  1180. Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);
  1181. Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);
  1182. Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);
  1183. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  1184. {
  1185. res = context.VectorZeroUpper64(res);
  1186. }
  1187. context.Copy(d, res);
  1188. }
  1189. else
  1190. {
  1191. Operand res = context.VectorZero();
  1192. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  1193. for (int index = 0; index < elems; index++)
  1194. {
  1195. Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
  1196. Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);
  1197. Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
  1198. Operand deMasked = context.BitwiseAnd(de, Const(mask));
  1199. Operand e = context.BitwiseOr(neShifted, deMasked);
  1200. res = EmitVectorInsert(context, res, e, index, op.Size);
  1201. }
  1202. context.Copy(GetVec(op.Rd), res);
  1203. }
  1204. }
  1205. [Flags]
  1206. private enum ShlRegFlags
  1207. {
  1208. None = 0,
  1209. Scalar = 1 << 0,
  1210. Signed = 1 << 1,
  1211. Round = 1 << 2,
  1212. Saturating = 1 << 3
  1213. }
  1214. private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
  1215. {
  1216. bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
  1217. bool signed = flags.HasFlag(ShlRegFlags.Signed);
  1218. bool round = flags.HasFlag(ShlRegFlags.Round);
  1219. bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
  1220. OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
  1221. Operand res = context.VectorZero();
  1222. int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
  1223. for (int index = 0; index < elems; index++)
  1224. {
  1225. Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  1226. Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0);
  1227. Operand e = !saturating
  1228. ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed)
  1229. : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed);
  1230. res = EmitVectorInsert(context, res, e, index, op.Size);
  1231. }
  1232. context.Copy(GetVec(op.Rd), res);
  1233. }
  1234. // long SignedShlReg(long op, int shiftLsB, bool round, int size);
  1235. // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size);
  1236. private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
  1237. {
  1238. int eSize = 8 << size;
  1239. Debug.Assert(op.Type == OperandType.I64);
  1240. Debug.Assert(shiftLsB.Type == OperandType.I32);
  1241. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  1242. Operand lbl1 = Label();
  1243. Operand lblEnd = Label();
  1244. Operand eSizeOp = Const(eSize);
  1245. Operand zero = Const(0);
  1246. Operand zeroL = Const(0L);
  1247. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  1248. context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
  1249. context.Copy(res, signed
  1250. ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
  1251. : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
  1252. context.Branch(lblEnd);
  1253. context.MarkLabel(lbl1);
  1254. context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
  1255. Operand shl = context.ShiftLeft(op, shiftLsB);
  1256. Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp);
  1257. context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl));
  1258. context.Branch(lblEnd);
  1259. context.MarkLabel(lblEnd);
  1260. return res;
  1261. }
  1262. // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size);
  1263. // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size);
  1264. private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed)
  1265. {
  1266. int eSize = 8 << size;
  1267. Debug.Assert(op.Type == OperandType.I64);
  1268. Debug.Assert(shiftLsB.Type == OperandType.I32);
  1269. Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
  1270. Operand lbl1 = Label();
  1271. Operand lbl2 = Label();
  1272. Operand lblEnd = Label();
  1273. Operand eSizeOp = Const(eSize);
  1274. Operand zero = Const(0);
  1275. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
  1276. context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual);
  1277. context.Copy(res, signed
  1278. ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)
  1279. : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize));
  1280. context.Branch(lblEnd);
  1281. context.MarkLabel(lbl1);
  1282. context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual);
  1283. context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less);
  1284. context.Copy(res, signed
  1285. ? EmitSignedSignSatQ(context, op, size)
  1286. : EmitUnsignedSignSatQ(context, op, size));
  1287. context.Branch(lblEnd);
  1288. context.MarkLabel(lbl2);
  1289. Operand shl = context.ShiftLeft(op, shiftLsB);
  1290. if (eSize == 64)
  1291. {
  1292. Operand sarOrShr = signed
  1293. ? context.ShiftRightSI(shl, shiftLsB)
  1294. : context.ShiftRightUI(shl, shiftLsB);
  1295. context.Copy(res, shl);
  1296. context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
  1297. context.Copy(res, signed
  1298. ? EmitSignedSignSatQ(context, op, size)
  1299. : EmitUnsignedSignSatQ(context, op, size));
  1300. }
  1301. else
  1302. {
  1303. context.Copy(res, signed
  1304. ? EmitSignedSrcSatQ(context, shl, size, signedDst: true)
  1305. : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false));
  1306. }
  1307. context.Branch(lblEnd);
  1308. context.MarkLabel(lblEnd);
  1309. return res;
  1310. }
  1311. // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  1312. // long SignedShrReg(long op, int shift, bool round, int eSize);
  1313. private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
  1314. {
  1315. if (round)
  1316. {
  1317. Operand lblEnd = Label();
  1318. Operand eSizeOp = Const(eSize);
  1319. Operand zeroL = Const(0L);
  1320. Operand one = Const(1);
  1321. Operand oneL = Const(1L);
  1322. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
  1323. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual);
  1324. Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one));
  1325. Operand add = context.Add(op, roundConst);
  1326. Operand sar = context.ShiftRightSI(add, shift);
  1327. if (eSize == 64)
  1328. {
  1329. Operand shr = context.ShiftRightUI(add, shift);
  1330. Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add));
  1331. Operand isLess = context.ICompareLess(left, zeroL);
  1332. context.Copy(res, context.ConditionalSelect(isLess, shr, sar));
  1333. }
  1334. else
  1335. {
  1336. context.Copy(res, sar);
  1337. }
  1338. context.Branch(lblEnd);
  1339. context.MarkLabel(lblEnd);
  1340. return res;
  1341. }
  1342. else
  1343. {
  1344. Operand lblEnd = Label();
  1345. Operand eSizeOp = Const(eSize);
  1346. Operand zeroL = Const(0L);
  1347. Operand negOneL = Const(-1L);
  1348. Operand sar = context.ShiftRightSI(op, shift);
  1349. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar);
  1350. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
  1351. Operand isLess = context.ICompareLess(op, zeroL);
  1352. context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL));
  1353. context.Branch(lblEnd);
  1354. context.MarkLabel(lblEnd);
  1355. return res;
  1356. }
  1357. }
  1358. // shift := [1, 128]; eSize := {8, 16, 32, 64}.
  1359. // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize);
  1360. private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize)
  1361. {
  1362. if (round)
  1363. {
  1364. Operand lblEnd = Label();
  1365. Operand zeroUL = Const(0UL);
  1366. Operand one = Const(1);
  1367. Operand oneUL = Const(1UL);
  1368. Operand eSizeMaxOp = Const(64);
  1369. Operand oneShl63UL = Const(1UL << 63);
  1370. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
  1371. context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater);
  1372. Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one));
  1373. Operand add = context.Add(op, roundConst);
  1374. Operand shr = context.ShiftRightUI(add, shift);
  1375. Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp);
  1376. context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr));
  1377. if (eSize == 64)
  1378. {
  1379. context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI);
  1380. Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one)));
  1381. context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right));
  1382. }
  1383. context.Branch(lblEnd);
  1384. context.MarkLabel(lblEnd);
  1385. return res;
  1386. }
  1387. else
  1388. {
  1389. Operand lblEnd = Label();
  1390. Operand eSizeOp = Const(eSize);
  1391. Operand zeroUL = Const(0UL);
  1392. Operand shr = context.ShiftRightUI(op, shift);
  1393. Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr);
  1394. context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less);
  1395. context.Copy(res, zeroUL);
  1396. context.Branch(lblEnd);
  1397. context.MarkLabel(lblEnd);
  1398. return res;
  1399. }
  1400. }
  1401. }
  1402. }