AInstEmitSimdShift.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ChocolArm64.Decoder;
  3. using ChocolArm64.State;
  4. using ChocolArm64.Translation;
  5. using System;
  6. using System.Reflection.Emit;
  7. using System.Runtime.Intrinsics.X86;
  8. using static ChocolArm64.Instruction.AInstEmitSimdHelper;
  9. namespace ChocolArm64.Instruction
  10. {
  11. static partial class AInstEmit
  12. {
  13. public static void Rshrn_V(AILEmitterCtx Context)
  14. {
  15. EmitVectorShrImmNarrowOpZx(Context, Round: true);
  16. }
  17. public static void Shl_S(AILEmitterCtx Context)
  18. {
  19. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  20. EmitScalarUnaryOpZx(Context, () =>
  21. {
  22. Context.EmitLdc_I4(GetImmShl(Op));
  23. Context.Emit(OpCodes.Shl);
  24. });
  25. }
  26. public static void Shl_V(AILEmitterCtx Context)
  27. {
  28. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  29. if (AOptimizations.UseSse2 && Op.Size > 0)
  30. {
  31. Type[] TypesSll = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  32. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  33. Context.EmitLdc_I4(GetImmShl(Op));
  34. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesSll));
  35. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  36. if (Op.RegisterSize == ARegisterSize.SIMD64)
  37. {
  38. EmitVectorZeroUpper(Context, Op.Rd);
  39. }
  40. }
  41. else
  42. {
  43. EmitVectorUnaryOpZx(Context, () =>
  44. {
  45. Context.EmitLdc_I4(GetImmShl(Op));
  46. Context.Emit(OpCodes.Shl);
  47. });
  48. }
  49. }
  50. public static void Shll_V(AILEmitterCtx Context)
  51. {
  52. AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
  53. int Shift = 8 << Op.Size;
  54. EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
  55. }
  56. public static void Shrn_V(AILEmitterCtx Context)
  57. {
  58. EmitVectorShrImmNarrowOpZx(Context, Round: false);
  59. }
  60. public static void Sli_V(AILEmitterCtx Context)
  61. {
  62. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  63. int Bytes = Op.GetBitsCount() >> 3;
  64. int Elems = Bytes >> Op.Size;
  65. int Shift = GetImmShl(Op);
  66. ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0;
  67. for (int Index = 0; Index < Elems; Index++)
  68. {
  69. EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
  70. Context.EmitLdc_I4(Shift);
  71. Context.Emit(OpCodes.Shl);
  72. EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
  73. Context.EmitLdc_I8((long)Mask);
  74. Context.Emit(OpCodes.And);
  75. Context.Emit(OpCodes.Or);
  76. EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
  77. }
  78. if (Op.RegisterSize == ARegisterSize.SIMD64)
  79. {
  80. EmitVectorZeroUpper(Context, Op.Rd);
  81. }
  82. }
  83. public static void Sqrshrn_S(AILEmitterCtx Context)
  84. {
  85. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  86. }
  87. public static void Sqrshrn_V(AILEmitterCtx Context)
  88. {
  89. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  90. }
  91. public static void Sqrshrun_S(AILEmitterCtx Context)
  92. {
  93. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  94. }
  95. public static void Sqrshrun_V(AILEmitterCtx Context)
  96. {
  97. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  98. }
  99. public static void Sqshrn_S(AILEmitterCtx Context)
  100. {
  101. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  102. }
  103. public static void Sqshrn_V(AILEmitterCtx Context)
  104. {
  105. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  106. }
  107. public static void Sqshrun_S(AILEmitterCtx Context)
  108. {
  109. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  110. }
  111. public static void Sqshrun_V(AILEmitterCtx Context)
  112. {
  113. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  114. }
  115. public static void Srshr_S(AILEmitterCtx Context)
  116. {
  117. EmitScalarShrImmOpSx(Context, ShrImmFlags.Round);
  118. }
  119. public static void Srshr_V(AILEmitterCtx Context)
  120. {
  121. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  122. if (AOptimizations.UseSse2 && Op.Size > 0
  123. && Op.Size < 3)
  124. {
  125. Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  126. Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
  127. int Shift = GetImmShr(Op);
  128. int ESize = 8 << Op.Size;
  129. EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
  130. Context.Emit(OpCodes.Dup);
  131. Context.EmitStvectmp();
  132. Context.EmitLdc_I4(ESize - Shift);
  133. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
  134. Context.EmitLdc_I4(ESize - 1);
  135. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  136. Context.EmitLdvectmp();
  137. Context.EmitLdc_I4(Shift);
  138. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs));
  139. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  140. EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
  141. if (Op.RegisterSize == ARegisterSize.SIMD64)
  142. {
  143. EmitVectorZeroUpper(Context, Op.Rd);
  144. }
  145. }
  146. else
  147. {
  148. EmitVectorShrImmOpSx(Context, ShrImmFlags.Round);
  149. }
  150. }
  151. public static void Srsra_S(AILEmitterCtx Context)
  152. {
  153. EmitScalarShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  154. }
  155. public static void Srsra_V(AILEmitterCtx Context)
  156. {
  157. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  158. if (AOptimizations.UseSse2 && Op.Size > 0
  159. && Op.Size < 3)
  160. {
  161. Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  162. Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
  163. int Shift = GetImmShr(Op);
  164. int ESize = 8 << Op.Size;
  165. EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size);
  166. EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
  167. Context.Emit(OpCodes.Dup);
  168. Context.EmitStvectmp();
  169. Context.EmitLdc_I4(ESize - Shift);
  170. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
  171. Context.EmitLdc_I4(ESize - 1);
  172. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  173. Context.EmitLdvectmp();
  174. Context.EmitLdc_I4(Shift);
  175. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs));
  176. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  177. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  178. EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
  179. if (Op.RegisterSize == ARegisterSize.SIMD64)
  180. {
  181. EmitVectorZeroUpper(Context, Op.Rd);
  182. }
  183. }
  184. else
  185. {
  186. EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  187. }
  188. }
  189. public static void Sshl_V(AILEmitterCtx Context)
  190. {
  191. EmitVectorShl(Context, Signed: true);
  192. }
  193. public static void Sshll_V(AILEmitterCtx Context)
  194. {
  195. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  196. EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
  197. }
  198. public static void Sshr_S(AILEmitterCtx Context)
  199. {
  200. EmitShrImmOp(Context, ShrImmFlags.ScalarSx);
  201. }
  202. public static void Sshr_V(AILEmitterCtx Context)
  203. {
  204. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  205. if (AOptimizations.UseSse2 && Op.Size > 0
  206. && Op.Size < 3)
  207. {
  208. Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  209. EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
  210. Context.EmitLdc_I4(GetImmShr(Op));
  211. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra));
  212. EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
  213. if (Op.RegisterSize == ARegisterSize.SIMD64)
  214. {
  215. EmitVectorZeroUpper(Context, Op.Rd);
  216. }
  217. }
  218. else
  219. {
  220. EmitShrImmOp(Context, ShrImmFlags.VectorSx);
  221. }
  222. }
  223. public static void Ssra_S(AILEmitterCtx Context)
  224. {
  225. EmitScalarShrImmOpSx(Context, ShrImmFlags.Accumulate);
  226. }
  227. public static void Ssra_V(AILEmitterCtx Context)
  228. {
  229. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  230. if (AOptimizations.UseSse2 && Op.Size > 0
  231. && Op.Size < 3)
  232. {
  233. Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  234. Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
  235. EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size);
  236. EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
  237. Context.EmitLdc_I4(GetImmShr(Op));
  238. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra));
  239. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  240. EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
  241. if (Op.RegisterSize == ARegisterSize.SIMD64)
  242. {
  243. EmitVectorZeroUpper(Context, Op.Rd);
  244. }
  245. }
  246. else
  247. {
  248. EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate);
  249. }
  250. }
  251. public static void Uqrshrn_S(AILEmitterCtx Context)
  252. {
  253. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  254. }
  255. public static void Uqrshrn_V(AILEmitterCtx Context)
  256. {
  257. EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  258. }
  259. public static void Uqshrn_S(AILEmitterCtx Context)
  260. {
  261. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  262. }
  263. public static void Uqshrn_V(AILEmitterCtx Context)
  264. {
  265. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  266. }
  267. public static void Urshr_S(AILEmitterCtx Context)
  268. {
  269. EmitScalarShrImmOpZx(Context, ShrImmFlags.Round);
  270. }
  271. public static void Urshr_V(AILEmitterCtx Context)
  272. {
  273. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  274. if (AOptimizations.UseSse2 && Op.Size > 0)
  275. {
  276. Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  277. Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
  278. int Shift = GetImmShr(Op);
  279. int ESize = 8 << Op.Size;
  280. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  281. Context.Emit(OpCodes.Dup);
  282. Context.EmitStvectmp();
  283. Context.EmitLdc_I4(ESize - Shift);
  284. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
  285. Context.EmitLdc_I4(ESize - 1);
  286. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  287. Context.EmitLdvectmp();
  288. Context.EmitLdc_I4(Shift);
  289. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  290. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  291. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  292. if (Op.RegisterSize == ARegisterSize.SIMD64)
  293. {
  294. EmitVectorZeroUpper(Context, Op.Rd);
  295. }
  296. }
  297. else
  298. {
  299. EmitVectorShrImmOpZx(Context, ShrImmFlags.Round);
  300. }
  301. }
  302. public static void Ursra_S(AILEmitterCtx Context)
  303. {
  304. EmitScalarShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  305. }
  306. public static void Ursra_V(AILEmitterCtx Context)
  307. {
  308. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  309. if (AOptimizations.UseSse2 && Op.Size > 0)
  310. {
  311. Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  312. Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
  313. int Shift = GetImmShr(Op);
  314. int ESize = 8 << Op.Size;
  315. EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  316. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  317. Context.Emit(OpCodes.Dup);
  318. Context.EmitStvectmp();
  319. Context.EmitLdc_I4(ESize - Shift);
  320. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
  321. Context.EmitLdc_I4(ESize - 1);
  322. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  323. Context.EmitLdvectmp();
  324. Context.EmitLdc_I4(Shift);
  325. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
  326. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  327. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  328. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  329. if (Op.RegisterSize == ARegisterSize.SIMD64)
  330. {
  331. EmitVectorZeroUpper(Context, Op.Rd);
  332. }
  333. }
  334. else
  335. {
  336. EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  337. }
  338. }
  339. public static void Ushl_V(AILEmitterCtx Context)
  340. {
  341. EmitVectorShl(Context, Signed: false);
  342. }
  343. public static void Ushll_V(AILEmitterCtx Context)
  344. {
  345. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  346. EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
  347. }
  348. public static void Ushr_S(AILEmitterCtx Context)
  349. {
  350. EmitShrImmOp(Context, ShrImmFlags.ScalarZx);
  351. }
  352. public static void Ushr_V(AILEmitterCtx Context)
  353. {
  354. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  355. if (AOptimizations.UseSse2 && Op.Size > 0)
  356. {
  357. Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  358. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  359. Context.EmitLdc_I4(GetImmShr(Op));
  360. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl));
  361. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  362. if (Op.RegisterSize == ARegisterSize.SIMD64)
  363. {
  364. EmitVectorZeroUpper(Context, Op.Rd);
  365. }
  366. }
  367. else
  368. {
  369. EmitShrImmOp(Context, ShrImmFlags.VectorZx);
  370. }
  371. }
  372. public static void Usra_S(AILEmitterCtx Context)
  373. {
  374. EmitScalarShrImmOpZx(Context, ShrImmFlags.Accumulate);
  375. }
  376. public static void Usra_V(AILEmitterCtx Context)
  377. {
  378. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  379. if (AOptimizations.UseSse2 && Op.Size > 0)
  380. {
  381. Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
  382. Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
  383. EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  384. EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
  385. Context.EmitLdc_I4(GetImmShr(Op));
  386. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl));
  387. Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
  388. EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
  389. if (Op.RegisterSize == ARegisterSize.SIMD64)
  390. {
  391. EmitVectorZeroUpper(Context, Op.Rd);
  392. }
  393. }
  394. else
  395. {
  396. EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate);
  397. }
  398. }
  399. private static void EmitVectorShl(AILEmitterCtx Context, bool Signed)
  400. {
  401. //This instruction shifts the value on vector A by the number of bits
  402. //specified on the signed, lower 8 bits of vector B. If the shift value
  403. //is greater or equal to the data size of each lane, then the result is zero.
  404. //Additionally, negative shifts produces right shifts by the negated shift value.
  405. AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
  406. int MaxShift = 8 << Op.Size;
  407. Action Emit = () =>
  408. {
  409. AILLabel LblShl = new AILLabel();
  410. AILLabel LblZero = new AILLabel();
  411. AILLabel LblEnd = new AILLabel();
  412. void EmitShift(OpCode ILOp)
  413. {
  414. Context.Emit(OpCodes.Dup);
  415. Context.EmitLdc_I4(MaxShift);
  416. Context.Emit(OpCodes.Bge_S, LblZero);
  417. Context.Emit(ILOp);
  418. Context.Emit(OpCodes.Br_S, LblEnd);
  419. }
  420. Context.Emit(OpCodes.Conv_I1);
  421. Context.Emit(OpCodes.Dup);
  422. Context.EmitLdc_I4(0);
  423. Context.Emit(OpCodes.Bge_S, LblShl);
  424. Context.Emit(OpCodes.Neg);
  425. EmitShift(Signed
  426. ? OpCodes.Shr
  427. : OpCodes.Shr_Un);
  428. Context.MarkLabel(LblShl);
  429. EmitShift(OpCodes.Shl);
  430. Context.MarkLabel(LblZero);
  431. Context.Emit(OpCodes.Pop);
  432. Context.Emit(OpCodes.Pop);
  433. Context.EmitLdc_I8(0);
  434. Context.MarkLabel(LblEnd);
  435. };
  436. if (Signed)
  437. {
  438. EmitVectorBinaryOpSx(Context, Emit);
  439. }
  440. else
  441. {
  442. EmitVectorBinaryOpZx(Context, Emit);
  443. }
  444. }
  445. [Flags]
  446. private enum ShrImmFlags
  447. {
  448. Scalar = 1 << 0,
  449. Signed = 1 << 1,
  450. Round = 1 << 2,
  451. Accumulate = 1 << 3,
  452. ScalarSx = Scalar | Signed,
  453. ScalarZx = Scalar,
  454. VectorSx = Signed,
  455. VectorZx = 0
  456. }
  457. private static void EmitScalarShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags)
  458. {
  459. EmitShrImmOp(Context, ShrImmFlags.ScalarSx | Flags);
  460. }
  461. private static void EmitScalarShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags)
  462. {
  463. EmitShrImmOp(Context, ShrImmFlags.ScalarZx | Flags);
  464. }
  465. private static void EmitVectorShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags)
  466. {
  467. EmitShrImmOp(Context, ShrImmFlags.VectorSx | Flags);
  468. }
  469. private static void EmitVectorShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags)
  470. {
  471. EmitShrImmOp(Context, ShrImmFlags.VectorZx | Flags);
  472. }
  473. private static void EmitShrImmOp(AILEmitterCtx Context, ShrImmFlags Flags)
  474. {
  475. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  476. bool Scalar = (Flags & ShrImmFlags.Scalar) != 0;
  477. bool Signed = (Flags & ShrImmFlags.Signed) != 0;
  478. bool Round = (Flags & ShrImmFlags.Round) != 0;
  479. bool Accumulate = (Flags & ShrImmFlags.Accumulate) != 0;
  480. int Shift = GetImmShr(Op);
  481. long RoundConst = 1L << (Shift - 1);
  482. int Bytes = Op.GetBitsCount() >> 3;
  483. int Elems = !Scalar ? Bytes >> Op.Size : 1;
  484. for (int Index = 0; Index < Elems; Index++)
  485. {
  486. EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
  487. if (Op.Size <= 2)
  488. {
  489. if (Round)
  490. {
  491. Context.EmitLdc_I8(RoundConst);
  492. Context.Emit(OpCodes.Add);
  493. }
  494. Context.EmitLdc_I4(Shift);
  495. Context.Emit(Signed ? OpCodes.Shr : OpCodes.Shr_Un);
  496. }
  497. else /* if (Op.Size == 3) */
  498. {
  499. EmitShrImm_64(Context, Signed, Round ? RoundConst : 0L, Shift);
  500. }
  501. if (Accumulate)
  502. {
  503. EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
  504. Context.Emit(OpCodes.Add);
  505. }
  506. EmitVectorInsertTmp(Context, Index, Op.Size);
  507. }
  508. Context.EmitLdvectmp();
  509. Context.EmitStvec(Op.Rd);
  510. if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
  511. {
  512. EmitVectorZeroUpper(Context, Op.Rd);
  513. }
  514. }
  515. private static void EmitVectorShrImmNarrowOpZx(AILEmitterCtx Context, bool Round)
  516. {
  517. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  518. int Shift = GetImmShr(Op);
  519. long RoundConst = 1L << (Shift - 1);
  520. int Elems = 8 >> Op.Size;
  521. int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
  522. if (Part != 0)
  523. {
  524. Context.EmitLdvec(Op.Rd);
  525. Context.EmitStvectmp();
  526. }
  527. for (int Index = 0; Index < Elems; Index++)
  528. {
  529. EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
  530. if (Round)
  531. {
  532. Context.EmitLdc_I8(RoundConst);
  533. Context.Emit(OpCodes.Add);
  534. }
  535. Context.EmitLdc_I4(Shift);
  536. Context.Emit(OpCodes.Shr_Un);
  537. EmitVectorInsertTmp(Context, Part + Index, Op.Size);
  538. }
  539. Context.EmitLdvectmp();
  540. Context.EmitStvec(Op.Rd);
  541. if (Part == 0)
  542. {
  543. EmitVectorZeroUpper(Context, Op.Rd);
  544. }
  545. }
  546. [Flags]
  547. private enum ShrImmSaturatingNarrowFlags
  548. {
  549. Scalar = 1 << 0,
  550. SignedSrc = 1 << 1,
  551. SignedDst = 1 << 2,
  552. Round = 1 << 3,
  553. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  554. ScalarSxZx = Scalar | SignedSrc,
  555. ScalarZxZx = Scalar,
  556. VectorSxSx = SignedSrc | SignedDst,
  557. VectorSxZx = SignedSrc,
  558. VectorZxZx = 0
  559. }
  560. private static void EmitRoundShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags)
  561. {
  562. EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.Round | Flags);
  563. }
  564. private static void EmitShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags)
  565. {
  566. AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
  567. bool Scalar = (Flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  568. bool SignedSrc = (Flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  569. bool SignedDst = (Flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  570. bool Round = (Flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  571. int Shift = GetImmShr(Op);
  572. long RoundConst = 1L << (Shift - 1);
  573. int Elems = !Scalar ? 8 >> Op.Size : 1;
  574. int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
  575. if (Scalar)
  576. {
  577. EmitVectorZeroLowerTmp(Context);
  578. }
  579. if (Part != 0)
  580. {
  581. Context.EmitLdvec(Op.Rd);
  582. Context.EmitStvectmp();
  583. }
  584. for (int Index = 0; Index < Elems; Index++)
  585. {
  586. EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
  587. if (Op.Size <= 1 || !Round)
  588. {
  589. if (Round)
  590. {
  591. Context.EmitLdc_I8(RoundConst);
  592. Context.Emit(OpCodes.Add);
  593. }
  594. Context.EmitLdc_I4(Shift);
  595. Context.Emit(SignedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
  596. }
  597. else /* if (Op.Size == 2 && Round) */
  598. {
  599. EmitShrImm_64(Context, SignedSrc, RoundConst, Shift); // Shift <= 32
  600. }
  601. EmitSatQ(Context, Op.Size, SignedSrc, SignedDst);
  602. EmitVectorInsertTmp(Context, Part + Index, Op.Size);
  603. }
  604. Context.EmitLdvectmp();
  605. Context.EmitStvec(Op.Rd);
  606. if (Part == 0)
  607. {
  608. EmitVectorZeroUpper(Context, Op.Rd);
  609. }
  610. }
  611. // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift;
  612. private static void EmitShrImm_64(
  613. AILEmitterCtx Context,
  614. bool Signed,
  615. long RoundConst,
  616. int Shift)
  617. {
  618. Context.EmitLdc_I8(RoundConst);
  619. Context.EmitLdc_I4(Shift);
  620. ASoftFallback.EmitCall(Context, Signed
  621. ? nameof(ASoftFallback.SignedShrImm_64)
  622. : nameof(ASoftFallback.UnsignedShrImm_64));
  623. }
  624. private static void EmitVectorShImmWidenBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
  625. {
  626. EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, true);
  627. }
  628. private static void EmitVectorShImmWidenBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
  629. {
  630. EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, false);
  631. }
  632. private static void EmitVectorShImmWidenBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
  633. {
  634. AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
  635. int Elems = 8 >> Op.Size;
  636. int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
  637. for (int Index = 0; Index < Elems; Index++)
  638. {
  639. EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed);
  640. Context.EmitLdc_I4(Imm);
  641. Emit();
  642. EmitVectorInsertTmp(Context, Index, Op.Size + 1);
  643. }
  644. Context.EmitLdvectmp();
  645. Context.EmitStvec(Op.Rd);
  646. }
  647. }
  648. }