InstEmitSimdShift.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008
  1. // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
  2. using ChocolArm64.Decoders;
  3. using ChocolArm64.State;
  4. using ChocolArm64.Translation;
  5. using System;
  6. using System.Reflection.Emit;
  7. using System.Runtime.Intrinsics.X86;
  8. using static ChocolArm64.Instructions.InstEmitSimdHelper;
  9. namespace ChocolArm64.Instructions
  10. {
  11. static partial class InstEmit
  12. {
  13. public static void Rshrn_V(ILEmitterCtx context)
  14. {
  15. EmitVectorShrImmNarrowOpZx(context, round: true);
  16. }
  17. public static void Shl_S(ILEmitterCtx context)
  18. {
  19. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  20. EmitScalarUnaryOpZx(context, () =>
  21. {
  22. context.EmitLdc_I4(GetImmShl(op));
  23. context.Emit(OpCodes.Shl);
  24. });
  25. }
  26. public static void Shl_V(ILEmitterCtx context)
  27. {
  28. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  29. if (Optimizations.UseSse2 && op.Size > 0)
  30. {
  31. Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  32. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  33. context.EmitLdc_I4(GetImmShl(op));
  34. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
  35. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  36. if (op.RegisterSize == RegisterSize.Simd64)
  37. {
  38. EmitVectorZeroUpper(context, op.Rd);
  39. }
  40. }
  41. else
  42. {
  43. EmitVectorUnaryOpZx(context, () =>
  44. {
  45. context.EmitLdc_I4(GetImmShl(op));
  46. context.Emit(OpCodes.Shl);
  47. });
  48. }
  49. }
  50. public static void Shll_V(ILEmitterCtx context)
  51. {
  52. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  53. int shift = 8 << op.Size;
  54. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
  55. }
  56. public static void Shrn_V(ILEmitterCtx context)
  57. {
  58. EmitVectorShrImmNarrowOpZx(context, round: false);
  59. }
  60. public static void Sli_V(ILEmitterCtx context)
  61. {
  62. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  63. int bytes = op.GetBitsCount() >> 3;
  64. int elems = bytes >> op.Size;
  65. int shift = GetImmShl(op);
  66. ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
  67. for (int index = 0; index < elems; index++)
  68. {
  69. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  70. context.EmitLdc_I4(shift);
  71. context.Emit(OpCodes.Shl);
  72. EmitVectorExtractZx(context, op.Rd, index, op.Size);
  73. context.EmitLdc_I8((long)mask);
  74. context.Emit(OpCodes.And);
  75. context.Emit(OpCodes.Or);
  76. EmitVectorInsert(context, op.Rd, index, op.Size);
  77. }
  78. if (op.RegisterSize == RegisterSize.Simd64)
  79. {
  80. EmitVectorZeroUpper(context, op.Rd);
  81. }
  82. }
  83. public static void Sqrshl_V(ILEmitterCtx context)
  84. {
  85. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  86. int bytes = op.GetBitsCount() >> 3;
  87. int elems = bytes >> op.Size;
  88. for (int index = 0; index < elems; index++)
  89. {
  90. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  91. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  92. context.Emit(OpCodes.Ldc_I4_1);
  93. context.EmitLdc_I4(op.Size);
  94. context.EmitLdarg(TranslatedSub.StateArgIdx);
  95. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  96. EmitVectorInsert(context, op.Rd, index, op.Size);
  97. }
  98. if (op.RegisterSize == RegisterSize.Simd64)
  99. {
  100. EmitVectorZeroUpper(context, op.Rd);
  101. }
  102. }
  103. public static void Sqrshrn_S(ILEmitterCtx context)
  104. {
  105. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  106. }
  107. public static void Sqrshrn_V(ILEmitterCtx context)
  108. {
  109. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  110. }
  111. public static void Sqrshrun_S(ILEmitterCtx context)
  112. {
  113. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  114. }
  115. public static void Sqrshrun_V(ILEmitterCtx context)
  116. {
  117. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  118. }
  119. public static void Sqshl_V(ILEmitterCtx context)
  120. {
  121. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  122. int bytes = op.GetBitsCount() >> 3;
  123. int elems = bytes >> op.Size;
  124. for (int index = 0; index < elems; index++)
  125. {
  126. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  127. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  128. context.Emit(OpCodes.Ldc_I4_0);
  129. context.EmitLdc_I4(op.Size);
  130. context.EmitLdarg(TranslatedSub.StateArgIdx);
  131. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlRegSatQ));
  132. EmitVectorInsert(context, op.Rd, index, op.Size);
  133. }
  134. if (op.RegisterSize == RegisterSize.Simd64)
  135. {
  136. EmitVectorZeroUpper(context, op.Rd);
  137. }
  138. }
  139. public static void Sqshrn_S(ILEmitterCtx context)
  140. {
  141. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
  142. }
  143. public static void Sqshrn_V(ILEmitterCtx context)
  144. {
  145. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
  146. }
  147. public static void Sqshrun_S(ILEmitterCtx context)
  148. {
  149. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
  150. }
  151. public static void Sqshrun_V(ILEmitterCtx context)
  152. {
  153. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
  154. }
  155. public static void Srshl_V(ILEmitterCtx context)
  156. {
  157. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  158. int bytes = op.GetBitsCount() >> 3;
  159. int elems = bytes >> op.Size;
  160. for (int index = 0; index < elems; index++)
  161. {
  162. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  163. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  164. context.Emit(OpCodes.Ldc_I4_1);
  165. context.EmitLdc_I4(op.Size);
  166. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  167. EmitVectorInsert(context, op.Rd, index, op.Size);
  168. }
  169. if (op.RegisterSize == RegisterSize.Simd64)
  170. {
  171. EmitVectorZeroUpper(context, op.Rd);
  172. }
  173. }
  174. public static void Srshr_S(ILEmitterCtx context)
  175. {
  176. EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
  177. }
  178. public static void Srshr_V(ILEmitterCtx context)
  179. {
  180. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  181. if (Optimizations.UseSse2 && op.Size > 0
  182. && op.Size < 3)
  183. {
  184. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  185. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  186. int shift = GetImmShr(op);
  187. int eSize = 8 << op.Size;
  188. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  189. context.Emit(OpCodes.Dup);
  190. context.EmitStvectmp();
  191. context.EmitLdc_I4(eSize - shift);
  192. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  193. context.EmitLdc_I4(eSize - 1);
  194. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  195. context.EmitLdvectmp();
  196. context.EmitLdc_I4(shift);
  197. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  198. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  199. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  200. if (op.RegisterSize == RegisterSize.Simd64)
  201. {
  202. EmitVectorZeroUpper(context, op.Rd);
  203. }
  204. }
  205. else
  206. {
  207. EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
  208. }
  209. }
  210. public static void Srsra_S(ILEmitterCtx context)
  211. {
  212. EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  213. }
  214. public static void Srsra_V(ILEmitterCtx context)
  215. {
  216. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  217. if (Optimizations.UseSse2 && op.Size > 0
  218. && op.Size < 3)
  219. {
  220. Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  221. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  222. int shift = GetImmShr(op);
  223. int eSize = 8 << op.Size;
  224. EmitLdvecWithSignedCast(context, op.Rd, op.Size);
  225. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  226. context.Emit(OpCodes.Dup);
  227. context.EmitStvectmp();
  228. context.EmitLdc_I4(eSize - shift);
  229. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  230. context.EmitLdc_I4(eSize - 1);
  231. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  232. context.EmitLdvectmp();
  233. context.EmitLdc_I4(shift);
  234. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
  235. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  236. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  237. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  238. if (op.RegisterSize == RegisterSize.Simd64)
  239. {
  240. EmitVectorZeroUpper(context, op.Rd);
  241. }
  242. }
  243. else
  244. {
  245. EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  246. }
  247. }
  248. public static void Sshl_V(ILEmitterCtx context)
  249. {
  250. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  251. int bytes = op.GetBitsCount() >> 3;
  252. int elems = bytes >> op.Size;
  253. for (int index = 0; index < elems; index++)
  254. {
  255. EmitVectorExtractSx(context, op.Rn, index, op.Size);
  256. EmitVectorExtractSx(context, op.Rm, index, op.Size);
  257. context.Emit(OpCodes.Ldc_I4_0);
  258. context.EmitLdc_I4(op.Size);
  259. SoftFallback.EmitCall(context, nameof(SoftFallback.SignedShlReg));
  260. EmitVectorInsert(context, op.Rd, index, op.Size);
  261. }
  262. if (op.RegisterSize == RegisterSize.Simd64)
  263. {
  264. EmitVectorZeroUpper(context, op.Rd);
  265. }
  266. }
  267. public static void Sshll_V(ILEmitterCtx context)
  268. {
  269. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  270. EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
  271. }
  272. public static void Sshr_S(ILEmitterCtx context)
  273. {
  274. EmitShrImmOp(context, ShrImmFlags.ScalarSx);
  275. }
  276. public static void Sshr_V(ILEmitterCtx context)
  277. {
  278. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  279. if (Optimizations.UseSse2 && op.Size > 0
  280. && op.Size < 3)
  281. {
  282. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  283. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  284. context.EmitLdc_I4(GetImmShr(op));
  285. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  286. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  287. if (op.RegisterSize == RegisterSize.Simd64)
  288. {
  289. EmitVectorZeroUpper(context, op.Rd);
  290. }
  291. }
  292. else
  293. {
  294. EmitShrImmOp(context, ShrImmFlags.VectorSx);
  295. }
  296. }
  297. public static void Ssra_S(ILEmitterCtx context)
  298. {
  299. EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
  300. }
  301. public static void Ssra_V(ILEmitterCtx context)
  302. {
  303. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  304. if (Optimizations.UseSse2 && op.Size > 0
  305. && op.Size < 3)
  306. {
  307. Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
  308. Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
  309. EmitLdvecWithSignedCast(context, op.Rd, op.Size);
  310. EmitLdvecWithSignedCast(context, op.Rn, op.Size);
  311. context.EmitLdc_I4(GetImmShr(op));
  312. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
  313. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  314. EmitStvecWithSignedCast(context, op.Rd, op.Size);
  315. if (op.RegisterSize == RegisterSize.Simd64)
  316. {
  317. EmitVectorZeroUpper(context, op.Rd);
  318. }
  319. }
  320. else
  321. {
  322. EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
  323. }
  324. }
  325. public static void Uqrshl_V(ILEmitterCtx context)
  326. {
  327. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  328. int bytes = op.GetBitsCount() >> 3;
  329. int elems = bytes >> op.Size;
  330. for (int index = 0; index < elems; index++)
  331. {
  332. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  333. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  334. context.Emit(OpCodes.Ldc_I4_1);
  335. context.EmitLdc_I4(op.Size);
  336. context.EmitLdarg(TranslatedSub.StateArgIdx);
  337. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  338. EmitVectorInsert(context, op.Rd, index, op.Size);
  339. }
  340. if (op.RegisterSize == RegisterSize.Simd64)
  341. {
  342. EmitVectorZeroUpper(context, op.Rd);
  343. }
  344. }
  345. public static void Uqrshrn_S(ILEmitterCtx context)
  346. {
  347. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  348. }
  349. public static void Uqrshrn_V(ILEmitterCtx context)
  350. {
  351. EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  352. }
  353. public static void Uqshl_V(ILEmitterCtx context)
  354. {
  355. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  356. int bytes = op.GetBitsCount() >> 3;
  357. int elems = bytes >> op.Size;
  358. for (int index = 0; index < elems; index++)
  359. {
  360. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  361. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  362. context.Emit(OpCodes.Ldc_I4_0);
  363. context.EmitLdc_I4(op.Size);
  364. context.EmitLdarg(TranslatedSub.StateArgIdx);
  365. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlRegSatQ));
  366. EmitVectorInsert(context, op.Rd, index, op.Size);
  367. }
  368. if (op.RegisterSize == RegisterSize.Simd64)
  369. {
  370. EmitVectorZeroUpper(context, op.Rd);
  371. }
  372. }
  373. public static void Uqshrn_S(ILEmitterCtx context)
  374. {
  375. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
  376. }
  377. public static void Uqshrn_V(ILEmitterCtx context)
  378. {
  379. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
  380. }
  381. public static void Urshl_V(ILEmitterCtx context)
  382. {
  383. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  384. int bytes = op.GetBitsCount() >> 3;
  385. int elems = bytes >> op.Size;
  386. for (int index = 0; index < elems; index++)
  387. {
  388. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  389. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  390. context.Emit(OpCodes.Ldc_I4_1);
  391. context.EmitLdc_I4(op.Size);
  392. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  393. EmitVectorInsert(context, op.Rd, index, op.Size);
  394. }
  395. if (op.RegisterSize == RegisterSize.Simd64)
  396. {
  397. EmitVectorZeroUpper(context, op.Rd);
  398. }
  399. }
  400. public static void Urshr_S(ILEmitterCtx context)
  401. {
  402. EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
  403. }
  404. public static void Urshr_V(ILEmitterCtx context)
  405. {
  406. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  407. if (Optimizations.UseSse2 && op.Size > 0)
  408. {
  409. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  410. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  411. int shift = GetImmShr(op);
  412. int eSize = 8 << op.Size;
  413. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  414. context.Emit(OpCodes.Dup);
  415. context.EmitStvectmp();
  416. context.EmitLdc_I4(eSize - shift);
  417. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  418. context.EmitLdc_I4(eSize - 1);
  419. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  420. context.EmitLdvectmp();
  421. context.EmitLdc_I4(shift);
  422. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  423. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  424. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  425. if (op.RegisterSize == RegisterSize.Simd64)
  426. {
  427. EmitVectorZeroUpper(context, op.Rd);
  428. }
  429. }
  430. else
  431. {
  432. EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
  433. }
  434. }
  435. public static void Ursra_S(ILEmitterCtx context)
  436. {
  437. EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  438. }
  439. public static void Ursra_V(ILEmitterCtx context)
  440. {
  441. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  442. if (Optimizations.UseSse2 && op.Size > 0)
  443. {
  444. Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  445. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  446. int shift = GetImmShr(op);
  447. int eSize = 8 << op.Size;
  448. EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
  449. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  450. context.Emit(OpCodes.Dup);
  451. context.EmitStvectmp();
  452. context.EmitLdc_I4(eSize - shift);
  453. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
  454. context.EmitLdc_I4(eSize - 1);
  455. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  456. context.EmitLdvectmp();
  457. context.EmitLdc_I4(shift);
  458. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
  459. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  460. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  461. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  462. if (op.RegisterSize == RegisterSize.Simd64)
  463. {
  464. EmitVectorZeroUpper(context, op.Rd);
  465. }
  466. }
  467. else
  468. {
  469. EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
  470. }
  471. }
  472. public static void Ushl_V(ILEmitterCtx context)
  473. {
  474. OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
  475. int bytes = op.GetBitsCount() >> 3;
  476. int elems = bytes >> op.Size;
  477. for (int index = 0; index < elems; index++)
  478. {
  479. EmitVectorExtractZx(context, op.Rn, index, op.Size);
  480. EmitVectorExtractZx(context, op.Rm, index, op.Size);
  481. context.Emit(OpCodes.Ldc_I4_0);
  482. context.EmitLdc_I4(op.Size);
  483. SoftFallback.EmitCall(context, nameof(SoftFallback.UnsignedShlReg));
  484. EmitVectorInsert(context, op.Rd, index, op.Size);
  485. }
  486. if (op.RegisterSize == RegisterSize.Simd64)
  487. {
  488. EmitVectorZeroUpper(context, op.Rd);
  489. }
  490. }
  491. public static void Ushll_V(ILEmitterCtx context)
  492. {
  493. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  494. EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
  495. }
  496. public static void Ushr_S(ILEmitterCtx context)
  497. {
  498. EmitShrImmOp(context, ShrImmFlags.ScalarZx);
  499. }
  500. public static void Ushr_V(ILEmitterCtx context)
  501. {
  502. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  503. if (Optimizations.UseSse2 && op.Size > 0)
  504. {
  505. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  506. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  507. context.EmitLdc_I4(GetImmShr(op));
  508. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  509. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  510. if (op.RegisterSize == RegisterSize.Simd64)
  511. {
  512. EmitVectorZeroUpper(context, op.Rd);
  513. }
  514. }
  515. else
  516. {
  517. EmitShrImmOp(context, ShrImmFlags.VectorZx);
  518. }
  519. }
  520. public static void Usra_S(ILEmitterCtx context)
  521. {
  522. EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
  523. }
  524. public static void Usra_V(ILEmitterCtx context)
  525. {
  526. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  527. if (Optimizations.UseSse2 && op.Size > 0)
  528. {
  529. Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
  530. Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
  531. EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
  532. EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
  533. context.EmitLdc_I4(GetImmShr(op));
  534. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
  535. context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
  536. EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
  537. if (op.RegisterSize == RegisterSize.Simd64)
  538. {
  539. EmitVectorZeroUpper(context, op.Rd);
  540. }
  541. }
  542. else
  543. {
  544. EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
  545. }
  546. }
  547. [Flags]
  548. private enum ShrImmFlags
  549. {
  550. Scalar = 1 << 0,
  551. Signed = 1 << 1,
  552. Round = 1 << 2,
  553. Accumulate = 1 << 3,
  554. ScalarSx = Scalar | Signed,
  555. ScalarZx = Scalar,
  556. VectorSx = Signed,
  557. VectorZx = 0
  558. }
  559. private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  560. {
  561. EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
  562. }
  563. private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  564. {
  565. EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
  566. }
  567. private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
  568. {
  569. EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
  570. }
  571. private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
  572. {
  573. EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
  574. }
  575. private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
  576. {
  577. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  578. bool scalar = (flags & ShrImmFlags.Scalar) != 0;
  579. bool signed = (flags & ShrImmFlags.Signed) != 0;
  580. bool round = (flags & ShrImmFlags.Round) != 0;
  581. bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
  582. int shift = GetImmShr(op);
  583. long roundConst = 1L << (shift - 1);
  584. int bytes = op.GetBitsCount() >> 3;
  585. int elems = !scalar ? bytes >> op.Size : 1;
  586. for (int index = 0; index < elems; index++)
  587. {
  588. EmitVectorExtract(context, op.Rn, index, op.Size, signed);
  589. if (op.Size <= 2)
  590. {
  591. if (round)
  592. {
  593. context.EmitLdc_I8(roundConst);
  594. context.Emit(OpCodes.Add);
  595. }
  596. context.EmitLdc_I4(shift);
  597. context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
  598. }
  599. else /* if (op.Size == 3) */
  600. {
  601. EmitShrImm64(context, signed, round ? roundConst : 0L, shift);
  602. }
  603. if (accumulate)
  604. {
  605. EmitVectorExtract(context, op.Rd, index, op.Size, signed);
  606. context.Emit(OpCodes.Add);
  607. }
  608. EmitVectorInsertTmp(context, index, op.Size);
  609. }
  610. context.EmitLdvectmp();
  611. context.EmitStvec(op.Rd);
  612. if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
  613. {
  614. EmitVectorZeroUpper(context, op.Rd);
  615. }
  616. }
  617. private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
  618. {
  619. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  620. int shift = GetImmShr(op);
  621. long roundConst = 1L << (shift - 1);
  622. int elems = 8 >> op.Size;
  623. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  624. if (part != 0)
  625. {
  626. context.EmitLdvec(op.Rd);
  627. context.EmitStvectmp();
  628. }
  629. for (int index = 0; index < elems; index++)
  630. {
  631. EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
  632. if (round)
  633. {
  634. context.EmitLdc_I8(roundConst);
  635. context.Emit(OpCodes.Add);
  636. }
  637. context.EmitLdc_I4(shift);
  638. context.Emit(OpCodes.Shr_Un);
  639. EmitVectorInsertTmp(context, part + index, op.Size);
  640. }
  641. context.EmitLdvectmp();
  642. context.EmitStvec(op.Rd);
  643. if (part == 0)
  644. {
  645. EmitVectorZeroUpper(context, op.Rd);
  646. }
  647. }
  648. [Flags]
  649. private enum ShrImmSaturatingNarrowFlags
  650. {
  651. Scalar = 1 << 0,
  652. SignedSrc = 1 << 1,
  653. SignedDst = 1 << 2,
  654. Round = 1 << 3,
  655. ScalarSxSx = Scalar | SignedSrc | SignedDst,
  656. ScalarSxZx = Scalar | SignedSrc,
  657. ScalarZxZx = Scalar,
  658. VectorSxSx = SignedSrc | SignedDst,
  659. VectorSxZx = SignedSrc,
  660. VectorZxZx = 0
  661. }
  662. private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  663. {
  664. EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
  665. }
  666. private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
  667. {
  668. OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
  669. bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
  670. bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
  671. bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
  672. bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
  673. int shift = GetImmShr(op);
  674. long roundConst = 1L << (shift - 1);
  675. int elems = !scalar ? 8 >> op.Size : 1;
  676. int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
  677. if (scalar)
  678. {
  679. EmitVectorZeroLowerTmp(context);
  680. }
  681. if (part != 0)
  682. {
  683. context.EmitLdvec(op.Rd);
  684. context.EmitStvectmp();
  685. }
  686. for (int index = 0; index < elems; index++)
  687. {
  688. EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
  689. if (op.Size <= 1 || !round)
  690. {
  691. if (round)
  692. {
  693. context.EmitLdc_I8(roundConst);
  694. context.Emit(OpCodes.Add);
  695. }
  696. context.EmitLdc_I4(shift);
  697. context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
  698. }
  699. else /* if (op.Size == 2 && round) */
  700. {
  701. EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32
  702. }
  703. EmitSatQ(context, op.Size, signedSrc, signedDst);
  704. EmitVectorInsertTmp(context, part + index, op.Size);
  705. }
  706. context.EmitLdvectmp();
  707. context.EmitStvec(op.Rd);
  708. if (part == 0)
  709. {
  710. EmitVectorZeroUpper(context, op.Rd);
  711. }
  712. }
  713. // dst64 = (Int(src64, signed) + roundConst) >> shift;
  714. private static void EmitShrImm64(
  715. ILEmitterCtx context,
  716. bool signed,
  717. long roundConst,
  718. int shift)
  719. {
  720. context.EmitLdc_I8(roundConst);
  721. context.EmitLdc_I4(shift);
  722. SoftFallback.EmitCall(context, signed
  723. ? nameof(SoftFallback.SignedShrImm64)
  724. : nameof(SoftFallback.UnsignedShrImm64));
  725. }
  726. private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm)
  727. {
  728. EmitVectorShImmWidenBinaryOp(context, emit, imm, true);
  729. }
  730. private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm)
  731. {
  732. EmitVectorShImmWidenBinaryOp(context, emit, imm, false);
  733. }
  734. private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
  735. {
  736. OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
  737. int elems = 8 >> op.Size;
  738. int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
  739. for (int index = 0; index < elems; index++)
  740. {
  741. EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
  742. context.EmitLdc_I4(imm);
  743. emit();
  744. EmitVectorInsertTmp(context, index, op.Size + 1);
  745. }
  746. context.EmitLdvectmp();
  747. context.EmitStvec(op.Rd);
  748. }
  749. }
  750. }