InstEmitSimdShift.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ChocolArm64.Decoders;
  3. using ChocolArm64.State;
  4. using ChocolArm64.Translation;
  5. using System;
  6. using System.Reflection.Emit;
  7. using System.Runtime.Intrinsics.X86;
  8. using static ChocolArm64.Instructions.InstEmitSimdHelper;
  9. namespace ChocolArm64.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void Rshrn_V(ILEmitterCtx context)
  14. {
  15. EmitVectorShrImmNarrowOpZx(context, round: true);
  16. }
  17. public static void Shl_S(ILEmitterCtx context)
  18. {
  19. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  20. EmitScalarUnaryOpZx(context, () =>
  21. {
  22. context.EmitLdc_I4(GetImmShl(op));
  23. context.Emit(OpCodes.Shl);
  24. });
  25. }
  26. public static void Shl_V(ILEmitterCtx context)
  27. {
  28. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  29. if (Optimizations.UseSse2 && op.Size > 0)
  30. {
  31. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  32. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  33. context.EmitLdc_I4(GetImmShl(op));
  34. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  35. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  36. if (op.RegisterSize == RegisterSize.Simd64)
  37. {
  38. EmitVectorZeroUpper(context, op.Rd);
  39. }
  40. }
  41. else
  42. {
  43. EmitVectorUnaryOpZx(context, () =>
  44. {
  45. context.EmitLdc_I4(GetImmShl(op));
  46. context.Emit(OpCodes.Shl);
  47. });
  48. }
  49. }
  50. public static void Shll_V(ILEmitterCtx context)
  51. {
  52. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  53. int shift = 8 << op.Size;
  54. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  55. }
  56. public static void Shrn_V(ILEmitterCtx context)
  57. {
  58. EmitVectorShrImmNarrowOpZx(context, round: false);
  59. }
  60. public static void Sli_V(ILEmitterCtx context)
  61. {
  62. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  63. int bytes = op.GetBitsCount() >> 3;
  64. int elems = bytes >> op.Size;
  65. int shift = GetImmShl(op);
  66. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
  67. for (int index = 0; index < elems; index++)
  68. {
  69. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  70. context.EmitLdc_I4(shift);
  71. context.Emit(OpCodes.Shl);
  72. EmitVectorExtractZx(context, op.Rd, index, op.Size);
  73. context.EmitLdc_I8((long)mask);
  74. context.Emit(OpCodes.And);
  75. context.Emit(OpCodes.Or);
  76. EmitVectorInsert(context, op.Rd, index, op.Size);
  77. }
  78. if (op.RegisterSize == RegisterSize.Simd64)
  79. {
  80. EmitVectorZeroUpper(context, op.Rd);
  81. }
  82. }
  83. public static void Sqrshrn_S(ILEmitterCtx context)
  84. {
  85. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  86. }
  87. public static void Sqrshrn_V(ILEmitterCtx context)
  88. {
  89. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  90. }
  91. public static void Sqrshrun_S(ILEmitterCtx context)
  92. {
  93. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  94. }
  95. public static void Sqrshrun_V(ILEmitterCtx context)
  96. {
  97. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  98. }
  99. public static void Sqshrn_S(ILEmitterCtx context)
  100. {
  101. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  102. }
  103. public static void Sqshrn_V(ILEmitterCtx context)
  104. {
  105. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  106. }
  107. public static void Sqshrun_S(ILEmitterCtx context)
  108. {
  109. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  110. }
  111. public static void Sqshrun_V(ILEmitterCtx context)
  112. {
  113. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  114. }
  115. public static void Srshr_S(ILEmitterCtx context)
  116. {
  117. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  118. }
  119. public static void Srshr_V(ILEmitterCtx context)
  120. {
  121. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  122. if (Optimizations.UseSse2 && op.Size > 0
  123. && op.Size < 3)
  124. {
  125. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  126. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  127. int shift = GetImmShr(op);
  128. int eSize = 8 << op.Size;
  129. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  130. context.Emit(OpCodes.Dup);
  131. context.EmitStvectmp();
  132. context.EmitLdc_I4(eSize - shift);
  133. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  134. context.EmitLdc_I4(eSize - 1);
  135. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  136. context.EmitLdvectmp();
  137. context.EmitLdc_I4(shift);
  138. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  139. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  140. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  141. if (op.RegisterSize == RegisterSize.Simd64)
  142. {
  143. EmitVectorZeroUpper(context, op.Rd);
  144. }
  145. }
  146. else
  147. {
  148. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  149. }
  150. }
  151. public static void Srsra_S(ILEmitterCtx context)
  152. {
  153. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  154. }
  155. public static void Srsra_V(ILEmitterCtx context)
  156. {
  157. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  158. if (Optimizations.UseSse2 && op.Size > 0
  159. && op.Size < 3)
  160. {
  161. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  162. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  163. int shift = GetImmShr(op);
  164. int eSize = 8 << op.Size;
  165. EmitLdvecWithSignedCast(context, op.Rd, op.Size);
  166. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  167. context.Emit(OpCodes.Dup);
  168. context.EmitStvectmp();
  169. context.EmitLdc_I4(eSize - shift);
  170. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  171. context.EmitLdc_I4(eSize - 1);
  172. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  173. context.EmitLdvectmp();
  174. context.EmitLdc_I4(shift);
  175. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  176. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  177. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  178. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  179. if (op.RegisterSize == RegisterSize.Simd64)
  180. {
  181. EmitVectorZeroUpper(context, op.Rd);
  182. }
  183. }
  184. else
  185. {
  186. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  187. }
  188. }
  189. public static void Sshl_V(ILEmitterCtx context)
  190. {
  191. EmitVectorShl(context, signed: true);
  192. }
  193. public static void Sshll_V(ILEmitterCtx context)
  194. {
  195. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  196. EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
  197. }
  198. public static void Sshr_S(ILEmitterCtx context)
  199. {
  200. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  201. }
  202. public static void Sshr_V(ILEmitterCtx context)
  203. {
  204. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  205. if (Optimizations.UseSse2 && op.Size > 0
  206. && op.Size < 3)
  207. {
  208. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  209. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  210. context.EmitLdc_I4(GetImmShr(op));
  211. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  212. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  213. if (op.RegisterSize == RegisterSize.Simd64)
  214. {
  215. EmitVectorZeroUpper(context, op.Rd);
  216. }
  217. }
  218. else
  219. {
  220. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  221. }
  222. }
  223. public static void Ssra_S(ILEmitterCtx context)
  224. {
  225. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  226. }
  227. public static void Ssra_V(ILEmitterCtx context)
  228. {
  229. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  230. if (Optimizations.UseSse2 && op.Size > 0
  231. && op.Size < 3)
  232. {
  233. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  234. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  235. EmitLdvecWithSignedCast(context, op.Rd, op.Size);
  236. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  237. context.EmitLdc_I4(GetImmShr(op));
  238. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  239. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  240. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  241. if (op.RegisterSize == RegisterSize.Simd64)
  242. {
  243. EmitVectorZeroUpper(context, op.Rd);
  244. }
  245. }
  246. else
  247. {
  248. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  249. }
  250. }
  251. public static void Uqrshrn_S(ILEmitterCtx context)
  252. {
  253. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  254. }
  255. public static void Uqrshrn_V(ILEmitterCtx context)
  256. {
  257. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  258. }
  259. public static void Uqshrn_S(ILEmitterCtx context)
  260. {
  261. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  262. }
  263. public static void Uqshrn_V(ILEmitterCtx context)
  264. {
  265. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  266. }
  267. public static void Urshr_S(ILEmitterCtx context)
  268. {
  269. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  270. }
  271. public static void Urshr_V(ILEmitterCtx context)
  272. {
  273. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  274. if (Optimizations.UseSse2 && op.Size > 0)
  275. {
  276. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  277. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  278. int shift = GetImmShr(op);
  279. int eSize = 8 << op.Size;
  280. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  281. context.Emit(OpCodes.Dup);
  282. context.EmitStvectmp();
  283. context.EmitLdc_I4(eSize - shift);
  284. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  285. context.EmitLdc_I4(eSize - 1);
  286. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  287. context.EmitLdvectmp();
  288. context.EmitLdc_I4(shift);
  289. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  290. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  291. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  292. if (op.RegisterSize == RegisterSize.Simd64)
  293. {
  294. EmitVectorZeroUpper(context, op.Rd);
  295. }
  296. }
  297. else
  298. {
  299. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  300. }
  301. }
  302. public static void Ursra_S(ILEmitterCtx context)
  303. {
  304. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  305. }
  306. public static void Ursra_V(ILEmitterCtx context)
  307. {
  308. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  309. if (Optimizations.UseSse2 && op.Size > 0)
  310. {
  311. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  312. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  313. int shift = GetImmShr(op);
  314. int eSize = 8 << op.Size;
  315. EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
  316. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  317. context.Emit(OpCodes.Dup);
  318. context.EmitStvectmp();
  319. context.EmitLdc_I4(eSize - shift);
  320. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  321. context.EmitLdc_I4(eSize - 1);
  322. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  323. context.EmitLdvectmp();
  324. context.EmitLdc_I4(shift);
  325. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  326. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  327. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  328. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  329. if (op.RegisterSize == RegisterSize.Simd64)
  330. {
  331. EmitVectorZeroUpper(context, op.Rd);
  332. }
  333. }
  334. else
  335. {
  336. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  337. }
  338. }
  339. public static void Ushl_V(ILEmitterCtx context)
  340. {
  341. EmitVectorShl(context, signed: false);
  342. }
  343. public static void Ushll_V(ILEmitterCtx context)
  344. {
  345. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  346. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
  347. }
  348. public static void Ushr_S(ILEmitterCtx context)
  349. {
  350. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  351. }
  352. public static void Ushr_V(ILEmitterCtx context)
  353. {
  354. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  355. if (Optimizations.UseSse2 && op.Size > 0)
  356. {
  357. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  358. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  359. context.EmitLdc_I4(GetImmShr(op));
  360. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  361. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  362. if (op.RegisterSize == RegisterSize.Simd64)
  363. {
  364. EmitVectorZeroUpper(context, op.Rd);
  365. }
  366. }
  367. else
  368. {
  369. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  370. }
  371. }
  372. public static void Usra_S(ILEmitterCtx context)
  373. {
  374. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  375. }
  376. public static void Usra_V(ILEmitterCtx context)
  377. {
  378. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  379. if (Optimizations.UseSse2 && op.Size > 0)
  380. {
  381. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  382. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  383. EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
  384. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  385. context.EmitLdc_I4(GetImmShr(op));
  386. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  387. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  388. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  389. if (op.RegisterSize == RegisterSize.Simd64)
  390. {
  391. EmitVectorZeroUpper(context, op.Rd);
  392. }
  393. }
  394. else
  395. {
  396. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  397. }
  398. }
  399. private static void EmitVectorShl(ILEmitterCtx context, bool signed)
  400. {
  401. //This instruction shifts the value on vector A by the number of bits
  402. //specified on the signed, lower 8 bits of vector B. If the shift value
  403. //is greater or equal to the data size of each lane, then the result is zero.
  404. //Additionally, negative shifts produces right shifts by the negated shift value.
  405. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  406. int maxShift = 8 << op.Size;
  407. Action emit = () =>
  408. {
  409. ILLabel lblShl = new ILLabel();
  410. ILLabel lblZero = new ILLabel();
  411. ILLabel lblEnd = new ILLabel();
  412. void EmitShift(OpCode ilOp)
  413. {
  414. context.Emit(OpCodes.Dup);
  415. context.EmitLdc_I4(maxShift);
  416. context.Emit(OpCodes.Bge_S, lblZero);
  417. context.Emit(ilOp);
  418. context.Emit(OpCodes.Br_S, lblEnd);
  419. }
  420. context.Emit(OpCodes.Conv_I1);
  421. context.Emit(OpCodes.Dup);
  422. context.EmitLdc_I4(0);
  423. context.Emit(OpCodes.Bge_S, lblShl);
  424. context.Emit(OpCodes.Neg);
  425. EmitShift(signed
  426. ? OpCodes.Shr
  427. : OpCodes.Shr_Un);
  428. context.MarkLabel(lblShl);
  429. EmitShift(OpCodes.Shl);
  430. context.MarkLabel(lblZero);
  431. context.Emit(OpCodes.Pop);
  432. context.Emit(OpCodes.Pop);
  433. context.EmitLdc_I8(0);
  434. context.MarkLabel(lblEnd);
  435. };
  436. if (signed)
  437. {
  438. EmitVectorBinaryOpSx(context, emit);
  439. }
  440. else
  441. {
  442. EmitVectorBinaryOpZx(context, emit);
  443. }
  444. }
  445. [Flags]
  446. private enum ShrImmFlags
  447. {
  448. Scalar = 1 << 0,
  449. Signed = 1 << 1,
  450. Round = 1 << 2,
  451. Accumulate = 1 << 3,
  452. ScalarSx = Scalar | Signed,
  453. ScalarZx = Scalar,
  454. VectorSx = Signed,
  455. VectorZx = 0
  456. }
  457. private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  458. {
  459. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  460. }
  461. private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  462. {
  463. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  464. }
  465. private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  466. {
  467. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  468. }
  469. private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  470. {
  471. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  472. }
  473. private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
  474. {
  475. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  476. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  477. bool signed = (flags & ShrImmFlags.Signed) != 0;
  478. bool round = (flags & ShrImmFlags.Round) != 0;
  479. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  480. int shift = GetImmShr(op);
  481. long roundConst = 1L << (shift - 1);
  482. int bytes = op.GetBitsCount() >> 3;
  483. int elems = !scalar ? bytes >> op.Size : 1;
  484. for (int index = 0; index < elems; index++)
  485. {
  486. EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  487. if (op.Size <= 2)
  488. {
  489. if (round)
  490. {
  491. context.EmitLdc_I8(roundConst);
  492. context.Emit(OpCodes.Add);
  493. }
  494. context.EmitLdc_I4(shift);
  495. context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
  496. }
  497. else /* if (op.Size == 3) */
  498. {
  499. EmitShrImm64(context, signed, round ? roundConst : 0L, shift);
  500. }
  501. if (accumulate)
  502. {
  503. EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  504. context.Emit(OpCodes.Add);
  505. }
  506. EmitVectorInsertTmp(context, index, op.Size);
  507. }
  508. context.EmitLdvectmp();
  509. context.EmitStvec(op.Rd);
  510. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  511. {
  512. EmitVectorZeroUpper(context, op.Rd);
  513. }
  514. }
  515. private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
  516. {
  517. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  518. int shift = GetImmShr(op);
  519. long roundConst = 1L << (shift - 1);
  520. int elems = 8 >> op.Size;
  521. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  522. if (part != 0)
  523. {
  524. context.EmitLdvec(op.Rd);
  525. context.EmitStvectmp();
  526. }
  527. for (int index = 0; index < elems; index++)
  528. {
  529. EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  530. if (round)
  531. {
  532. context.EmitLdc_I8(roundConst);
  533. context.Emit(OpCodes.Add);
  534. }
  535. context.EmitLdc_I4(shift);
  536. context.Emit(OpCodes.Shr_Un);
  537. EmitVectorInsertTmp(context, part + index, op.Size);
  538. }
  539. context.EmitLdvectmp();
  540. context.EmitStvec(op.Rd);
  541. if (part == 0)
  542. {
  543. EmitVectorZeroUpper(context, op.Rd);
  544. }
  545. }
  546. [Flags]
  547. private enum ShrImmSaturatingNarrowFlags
  548. {
  549. Scalar = 1 << 0,
  550. SignedSrc = 1 << 1,
  551. SignedDst = 1 << 2,
  552. Round = 1 << 3,
  553. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  554. ScalarSxZx = Scalar | SignedSrc,
  555. ScalarZxZx = Scalar,
  556. VectorSxSx = SignedSrc | SignedDst,
  557. VectorSxZx = SignedSrc,
  558. VectorZxZx = 0
  559. }
  560. private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  561. {
  562. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  563. }
  564. private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  565. {
  566. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  567. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  568. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  569. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  570. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  571. int shift = GetImmShr(op);
  572. long roundConst = 1L << (shift - 1);
  573. int elems = !scalar ? 8 >> op.Size : 1;
  574. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  575. if (scalar)
  576. {
  577. EmitVectorZeroLowerTmp(context);
  578. }
  579. if (part != 0)
  580. {
  581. context.EmitLdvec(op.Rd);
  582. context.EmitStvectmp();
  583. }
  584. for (int index = 0; index < elems; index++)
  585. {
  586. EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  587. if (op.Size <= 1 || !round)
  588. {
  589. if (round)
  590. {
  591. context.EmitLdc_I8(roundConst);
  592. context.Emit(OpCodes.Add);
  593. }
  594. context.EmitLdc_I4(shift);
  595. context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
  596. }
  597. else /* if (op.Size == 2 && round) */
  598. {
  599. EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32
  600. }
  601. EmitSatQ(context, op.Size, signedSrc, signedDst);
  602. EmitVectorInsertTmp(context, part + index, op.Size);
  603. }
  604. context.EmitLdvectmp();
  605. context.EmitStvec(op.Rd);
  606. if (part == 0)
  607. {
  608. EmitVectorZeroUpper(context, op.Rd);
  609. }
  610. }
  611. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  612. private static void EmitShrImm64(
  613. ILEmitterCtx context,
  614. bool signed,
  615. long roundConst,
  616. int shift)
  617. {
  618. context.EmitLdc_I8(roundConst);
  619. context.EmitLdc_I4(shift);
  620. SoftFallback.EmitCall(context, signed
  621. ? nameof(SoftFallback.SignedShrImm64)
  622. : nameof(SoftFallback.UnsignedShrImm64));
  623. }
  624. private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm)
  625. {
  626. EmitVectorShImmWidenBinaryOp(context, emit, imm, true);
  627. }
  628. private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm)
  629. {
  630. EmitVectorShImmWidenBinaryOp(context, emit, imm, false);
  631. }
  632. private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
  633. {
  634. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  635. int elems = 8 >> op.Size;
  636. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  637. for (int index = 0; index < elems; index++)
  638. {
  639. EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  640. context.EmitLdc_I4(imm);
  641. emit();
  642. EmitVectorInsertTmp(context, index, op.Size + 1);
  643. }
  644. context.EmitLdvectmp();
  645. context.EmitStvec(op.Rd);
  646. }
  647. }
  648. }